From f7b90a7edcb321870a6de99dfca1231c8e44b864 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 15:09:04 +0100 Subject: [PATCH 01/58] move (de)compression methods into separate compressor instance --- attic/key.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/attic/key.py b/attic/key.py index ef623f36..349211ae 100644 --- a/attic/key.py +++ b/attic/key.py @@ -44,10 +44,27 @@ def key_factory(repository, manifest_data): raise UnsupportedPayloadError(manifest_data[0]) +class CompressionBase(object): + def compress(self, data): + pass + + def decompress(self, data): + pass + + +class ZlibCompression(CompressionBase): + def compress(self, data): + return zlib.compress(data) + + def decompress(self, data): + return zlib.decompress(data) + + class KeyBase(object): def __init__(self): self.TYPE_STR = bytes([self.TYPE]) + self.compressor = ZlibCompression() def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -78,12 +95,12 @@ class PlaintextKey(KeyBase): return sha256(data).digest() def encrypt(self, data): - return b''.join([self.TYPE_STR, zlib.compress(data)]) + return b''.join([self.TYPE_STR, self.compressor.compress(data)]) def decrypt(self, id, data): if data[0] != self.TYPE: raise IntegrityError('Invalid encryption envelope') - data = zlib.decompress(memoryview(data)[1:]) + data = self.compressor.decompress(memoryview(data)[1:]) if id and sha256(data).digest() != id: raise IntegrityError('Chunk id verification failed') return data @@ -110,7 +127,7 @@ class AESKeyBase(KeyBase): return HMAC(self.id_key, data, sha256).digest() def encrypt(self, data): - data = zlib.compress(data) + data = self.compressor.compress(data) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) hmac = HMAC(self.enc_hmac_key, data, sha256).digest() @@ -123,7 +140,7 @@ class AESKeyBase(KeyBase): if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview + data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview if id and HMAC(self.id_key, data, sha256).digest() != id: raise IntegrityError('Chunk id verification failed') return data From 61af8a4babe4b09a995d9446b270a9624aa87383 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 16:54:14 +0100 Subject: [PATCH 02/58] add lzma ("xz") compressor (not used yet) lzma (known from .xz files) can compress better than zlib, but takes significantly more time for that. Note: compressor is set up in attic.key.KeyBase and currently still uses ZlibCompressor. This changeset is primarily for experimenting with lzma and also to keep changesets clean. Selection, auto-detection and parametrization of compression method is still TODO. --- attic/key.py | 26 +++++++++++++++++++++++--- setup.py | 6 +++++- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/attic/key.py b/attic/key.py index 349211ae..81ad4c22 100644 --- a/attic/key.py +++ b/attic/key.py @@ -7,6 +7,14 @@ import hmac from hashlib import sha256 import zlib +try: + import lzma # python >= 3.3 +except ImportError: + try: + from backports import lzma # backports.lzma from pypi + except ImportError: + lzma = None + from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks from attic.helpers import IntegrityError, get_keys_dir, Error @@ -44,7 +52,7 @@ def key_factory(repository, manifest_data): raise UnsupportedPayloadError(manifest_data[0]) -class CompressionBase(object): +class CompressorBase(object): def compress(self, data): pass @@ -52,7 +60,7 @@ class CompressionBase(object): pass -class ZlibCompression(CompressionBase): +class ZlibCompressor(CompressorBase): def compress(self, data): return zlib.compress(data) @@ -60,11 +68,23 @@ class ZlibCompression(CompressionBase): return zlib.decompress(data) +class LzmaCompressor(CompressorBase): + def __init__(self): + if lzma is None: + raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi") + + def compress(self, data): + return lzma.compress(data) + + def decompress(self, data): + return lzma.decompress(data) + + class KeyBase(object): def __init__(self): self.TYPE_STR = bytes([self.TYPE]) - self.compressor = ZlibCompression() + self.compressor = ZlibCompressor() def id_hash(self, data): """Return HMAC hash using the "id" HMAC key diff --git a/setup.py b/setup.py index 2c1432b1..573e74f4 100644 --- a/setup.py +++ b/setup.py @@ -96,6 +96,10 @@ elif platform == 'FreeBSD': elif platform == 'Darwin': ext_modules.append(Extension('attic.platform_darwin', [platform_darwin_source])) +install_requires = ['msgpack-python'] +if sys.version_info < (3, 3): + install_requires.append('backports.lzma') + setup( name='Attic', version=versioneer.get_version(), @@ -122,5 +126,5 @@ setup( scripts=['scripts/attic'], cmdclass=cmdclass, ext_modules=ext_modules, - install_requires=['msgpack-python'] + install_requires=install_requires, ) From 7b1a3dcd5d91c0eb02888e21cdb5bb9fab06b005 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 19:51:51 +0100 Subject: [PATCH 03/58] implement compression flexibility (attic init -c / --compression zlib|lzma) I had to do a bit of bit-fiddling to preserve backwards compatibility. Previous code used just 1 byte to determine encryption type and compression was hardcoded to be zlib. It uses type bytes 0x00, 0x01 and 0x02 for that. The record layout was rather fixed and there was no variable length part to add a compression type byte. So I split that type byte: the upper 4bits are compression (0 means zlib as before), the lower 4 bits are for encryption. --- attic/archiver.py | 3 ++ attic/key.py | 73 ++++++++++++++++++++++++++++---------- attic/testsuite/archive.py | 4 +-- attic/testsuite/key.py | 1 + 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d..43c99fac 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -475,6 +475,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-e', '--encryption', dest='encryption', choices=('none', 'passphrase', 'keyfile'), default='none', help='select encryption method') + subparser.add_argument('-c', '--compression', dest='compression', + choices=('zlib', 'lzma'), default='zlib', + help='select compression method') check_epilog = textwrap.dedent(""" The check command verifies the consistency of a repository and the corresponding diff --git a/attic/key.py b/attic/key.py index 81ad4c22..a36448a6 100644 --- a/attic/key.py +++ b/attic/key.py @@ -35,24 +35,30 @@ class HMAC(hmac.HMAC): def key_creator(repository, args): if args.encryption == 'keyfile': return KeyfileKey.create(repository, args) - elif args.encryption == 'passphrase': + if args.encryption == 'passphrase': return PassphraseKey.create(repository, args) - else: + if args.encryption == 'none': return PlaintextKey.create(repository, args) + raise NotImplemented(args.encryption) def key_factory(repository, manifest_data): - if manifest_data[0] == KeyfileKey.TYPE: + # key type is determined by 4 lower bits of the type byte + key_type = manifest_data[0] & 0x0f + if key_type == KeyfileKey.TYPE: return KeyfileKey.detect(repository, manifest_data) - elif manifest_data[0] == PassphraseKey.TYPE: + if key_type == PassphraseKey.TYPE: return PassphraseKey.detect(repository, manifest_data) - elif manifest_data[0] == PlaintextKey.TYPE: + if key_type == PlaintextKey.TYPE: return PlaintextKey.detect(repository, manifest_data) - else: - raise UnsupportedPayloadError(manifest_data[0]) + raise UnsupportedPayloadError(manifest_data[0]) class CompressorBase(object): + @classmethod + def create(cls, args): + return cls() + def compress(self, data): pass @@ -61,6 +67,8 @@ class CompressorBase(object): class ZlibCompressor(CompressorBase): + TYPE = 0x00 # must be 0x00 for backwards compatibility + def compress(self, data): return zlib.compress(data) @@ -69,6 +77,8 @@ class ZlibCompressor(CompressorBase): class LzmaCompressor(CompressorBase): + TYPE = 0x10 + def __init__(self): if lzma is None: raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi") @@ -80,11 +90,31 @@ class LzmaCompressor(CompressorBase): return lzma.decompress(data) +def compressor_creator(args): + if args is None: # used by unit tests + return ZlibCompressor.create(args) + if args.compression == 'lzma': + return LzmaCompressor.create(args) + if args.compression == 'zlib': + return ZlibCompressor.create(args) + raise NotImplemented(args.compression) + + +def compressor_factory(manifest_data): + # compression is determined by 4 upper bits of the type byte + compression_type = manifest_data[0] & 0xf0 + if compression_type == ZlibCompressor.TYPE: + return ZlibCompressor() + if compression_type == LzmaCompressor.TYPE: + return LzmaCompressor() + raise UnsupportedPayloadError(manifest_data[0]) + + class KeyBase(object): - def __init__(self): - self.TYPE_STR = bytes([self.TYPE]) - self.compressor = ZlibCompressor() + def __init__(self, compressor): + self.compressor = compressor + self.TYPE_STR = bytes([self.TYPE | self.compressor.TYPE]) def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -105,11 +135,13 @@ class PlaintextKey(KeyBase): @classmethod def create(cls, repository, args): print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.') - return cls() + compressor = compressor_creator(args) + return cls(compressor) @classmethod def detect(cls, repository, manifest_data): - return cls() + compressor = compressor_factory(manifest_data) + return cls(compressor) def id_hash(self, data): return sha256(data).digest() @@ -118,8 +150,9 @@ class PlaintextKey(KeyBase): return b''.join([self.TYPE_STR, self.compressor.compress(data)]) def decrypt(self, id, data): - if data[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') + type_str = bytes([data[0]]) + if type_str != self.TYPE_STR: + raise IntegrityError('Invalid encryption envelope %r' % type_str) data = self.compressor.decompress(memoryview(data)[1:]) if id and sha256(data).digest() != id: raise IntegrityError('Chunk id verification failed') @@ -191,7 +224,8 @@ class PassphraseKey(AESKeyBase): @classmethod def create(cls, repository, args): - key = cls() + compressor = compressor_creator(args) + key = cls(compressor) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is not None: passphrase2 = passphrase @@ -213,7 +247,8 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - key = cls() + compressor = compressor_factory(manifest_data) + key = cls(compressor) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is None: passphrase = getpass(prompt) @@ -238,7 +273,8 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - key = cls() + compressor = compressor_factory(manifest_data) + key = cls(compressor) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path passphrase = os.environ.get('ATTIC_PASSPHRASE', '') @@ -346,7 +382,8 @@ class KeyfileKey(AESKeyBase): passphrase2 = getpass('Enter same passphrase again: ') if passphrase != passphrase2: print('Passphrases do not match') - key = cls() + compressor = compressor_creator(args) + key = cls(compressor) key.repository_id = repository.id key.init_from_random_data(get_random_bytes(100)) key.init_ciphers() diff --git a/attic/testsuite/archive.py b/attic/testsuite/archive.py index 8d478f5f..bdc7aea5 100644 --- a/attic/testsuite/archive.py +++ b/attic/testsuite/archive.py @@ -1,7 +1,7 @@ import msgpack from attic.testsuite import AtticTestCase from attic.archive import CacheChunkBuffer, RobustUnpacker -from attic.key import PlaintextKey +from attic.key import PlaintextKey, ZlibCompressor class MockCache: @@ -19,7 +19,7 @@ class ChunkBufferTestCase(AtticTestCase): def test(self): data = [{b'foo': 1}, {b'bar': 2}] cache = MockCache() - key = PlaintextKey() + key = PlaintextKey(ZlibCompressor()) chunks = CacheChunkBuffer(cache, key, None) for d in data: chunks.add(d) diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 543d1be3..5e191abe 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -13,6 +13,7 @@ class KeyTestCase(AtticTestCase): class MockArgs(object): repository = Location(tempfile.mkstemp()[1]) + compression = 'zlib' keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 From b305c79ee5650ead397f4c2a4e600bd63f250225 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 19:56:50 +0100 Subject: [PATCH 04/58] support --compression none --- attic/archiver.py | 2 +- attic/key.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 43c99fac..82d77bc5 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -476,7 +476,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") choices=('none', 'passphrase', 'keyfile'), default='none', help='select encryption method') subparser.add_argument('-c', '--compression', dest='compression', - choices=('zlib', 'lzma'), default='zlib', + choices=('none', 'zlib', 'lzma'), default='zlib', help='select compression method') check_epilog = textwrap.dedent(""" diff --git a/attic/key.py b/attic/key.py index a36448a6..16adea14 100644 --- a/attic/key.py +++ b/attic/key.py @@ -90,6 +90,16 @@ class LzmaCompressor(CompressorBase): return lzma.decompress(data) +class NullCompressor(CompressorBase): + TYPE = 0x20 + + def compress(self, data): + return data + + def decompress(self, data): + return data + + def compressor_creator(args): if args is None: # used by unit tests return ZlibCompressor.create(args) @@ -97,6 +107,8 @@ def compressor_creator(args): return LzmaCompressor.create(args) if args.compression == 'zlib': return ZlibCompressor.create(args) + if args.compression == 'none': + return NullCompressor.create(args) raise NotImplemented(args.compression) @@ -107,6 +119,8 @@ def compressor_factory(manifest_data): return ZlibCompressor() if compression_type == LzmaCompressor.TYPE: return LzmaCompressor() + if compression_type == NullCompressor.TYPE: + return NullCompressor() raise UnsupportedPayloadError(manifest_data[0]) From 07d2df8cef923cb3b079e1fcc206f35719a29234 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 20:02:48 +0100 Subject: [PATCH 05/58] key module: just reordered functions/classes, no functional change now compression stuff is at top, encryption/key stuff at bottom --- attic/key.py | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/attic/key.py b/attic/key.py index 16adea14..0bdfddd3 100644 --- a/attic/key.py +++ b/attic/key.py @@ -32,25 +32,27 @@ class HMAC(hmac.HMAC): self.inner.update(msg) -def key_creator(repository, args): - if args.encryption == 'keyfile': - return KeyfileKey.create(repository, args) - if args.encryption == 'passphrase': - return PassphraseKey.create(repository, args) - if args.encryption == 'none': - return PlaintextKey.create(repository, args) - raise NotImplemented(args.encryption) +def compressor_creator(args): + if args is None: # used by unit tests + return ZlibCompressor.create(args) + if args.compression == 'zlib': + return ZlibCompressor.create(args) + if args.compression == 'lzma': + return LzmaCompressor.create(args) + if args.compression == 'none': + return NullCompressor.create(args) + raise NotImplemented(args.compression) -def key_factory(repository, manifest_data): - # key type is determined by 4 lower bits of the type byte - key_type = manifest_data[0] & 0x0f - if key_type == KeyfileKey.TYPE: - return KeyfileKey.detect(repository, manifest_data) - if key_type == PassphraseKey.TYPE: - return PassphraseKey.detect(repository, manifest_data) - if key_type == PlaintextKey.TYPE: - return PlaintextKey.detect(repository, manifest_data) +def compressor_factory(manifest_data): + # compression is determined by 4 upper bits of the type byte + compression_type = manifest_data[0] & 0xf0 + if compression_type == ZlibCompressor.TYPE: + return ZlibCompressor() + if compression_type == LzmaCompressor.TYPE: + return LzmaCompressor() + if compression_type == NullCompressor.TYPE: + return NullCompressor() raise UnsupportedPayloadError(manifest_data[0]) @@ -100,27 +102,25 @@ class NullCompressor(CompressorBase): return data -def compressor_creator(args): - if args is None: # used by unit tests - return ZlibCompressor.create(args) - if args.compression == 'lzma': - return LzmaCompressor.create(args) - if args.compression == 'zlib': - return ZlibCompressor.create(args) - if args.compression == 'none': - return NullCompressor.create(args) - raise NotImplemented(args.compression) +def key_creator(repository, args): + if args.encryption == 'keyfile': + return KeyfileKey.create(repository, args) + if args.encryption == 'passphrase': + return PassphraseKey.create(repository, args) + if args.encryption == 'none': + return PlaintextKey.create(repository, args) + raise NotImplemented(args.encryption) -def compressor_factory(manifest_data): - # compression is determined by 4 upper bits of the type byte - compression_type = manifest_data[0] & 0xf0 - if compression_type == ZlibCompressor.TYPE: - return ZlibCompressor() - if compression_type == LzmaCompressor.TYPE: - return LzmaCompressor() - if compression_type == NullCompressor.TYPE: - return NullCompressor() +def key_factory(repository, manifest_data): + # key type is determined by 4 lower bits of the type byte + key_type = manifest_data[0] & 0x0f + if key_type == KeyfileKey.TYPE: + return KeyfileKey.detect(repository, manifest_data) + if key_type == PassphraseKey.TYPE: + return PassphraseKey.detect(repository, manifest_data) + if key_type == PlaintextKey.TYPE: + return PlaintextKey.detect(repository, manifest_data) raise UnsupportedPayloadError(manifest_data[0]) From a8b116d965c1c6ee804adb22fb9bfc49f51a2c75 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 20:51:39 +0100 Subject: [PATCH 06/58] fix type byte checks, deduplicate code --- attic/key.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/attic/key.py b/attic/key.py index 0bdfddd3..30b1ffb7 100644 --- a/attic/key.py +++ b/attic/key.py @@ -130,6 +130,11 @@ class KeyBase(object): self.compressor = compressor self.TYPE_STR = bytes([self.TYPE | self.compressor.TYPE]) + def type_check(self, type_byte): + type_str = bytes([type_byte]) + if type_str != self.TYPE_STR: + raise IntegrityError('Invalid encryption envelope %r' % type_str) + def id_hash(self, data): """Return HMAC hash using the "id" HMAC key """ @@ -164,9 +169,7 @@ class PlaintextKey(KeyBase): return b''.join([self.TYPE_STR, self.compressor.compress(data)]) def decrypt(self, id, data): - type_str = bytes([data[0]]) - if type_str != self.TYPE_STR: - raise IntegrityError('Invalid encryption envelope %r' % type_str) + self.type_check(data[0]) data = self.compressor.decompress(memoryview(data)[1:]) if id and sha256(data).digest() != id: raise IntegrityError('Chunk id verification failed') @@ -201,8 +204,7 @@ class AESKeyBase(KeyBase): return b''.join((self.TYPE_STR, hmac, data)) def decrypt(self, id, data): - if data[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') + self.type_check(data[0]) hmac = memoryview(data)[1:33] if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: raise IntegrityError('Encryption envelope checksum mismatch') @@ -213,8 +215,7 @@ class AESKeyBase(KeyBase): return data def extract_nonce(self, payload): - if payload[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') + self.type_check(payload[0]) nonce = bytes_to_long(payload[33:41]) return nonce From ff542e612ad9a2afd7ae10825a97043da60517c2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 2 Mar 2015 21:34:19 +0100 Subject: [PATCH 07/58] flexible type header parsing, compression levels, sha256 + sha512_256 e.g.: attic init --encryption=none --compression 0 --mac 1 repo.attic Note: Numeric --compression and --mac values are a bit simplistic, but even if one used lots of string choices there, one would still have to look them up in the docs. --- attic/archiver.py | 9 +- attic/key.py | 339 ++++++++++++++++++++++++------------ attic/testsuite/archive.py | 9 +- attic/testsuite/archiver.py | 4 +- attic/testsuite/key.py | 9 +- 5 files changed, 252 insertions(+), 118 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 82d77bc5..f0d82662 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -13,7 +13,7 @@ from attic import __version__ from attic.archive import Archive, ArchiveChecker from attic.repository import Repository from attic.cache import Cache -from attic.key import key_creator +from attic.key import key_creator, COMPR_DEFAULT from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ @@ -476,8 +476,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") choices=('none', 'passphrase', 'keyfile'), default='none', help='select encryption method') subparser.add_argument('-c', '--compression', dest='compression', - choices=('none', 'zlib', 'lzma'), default='zlib', - help='select compression method') + type=int, default=COMPR_DEFAULT, metavar='METHOD', + help='select compression method (0..19)') + subparser.add_argument('-m', '--mac', dest='mac', + type=int, default=None, metavar='METHOD', + help='select hash/mac method (0..3)') check_epilog = textwrap.dedent(""" The check command verifies the consistency of a repository and the corresponding diff --git a/attic/key.py b/attic/key.py index 30b1ffb7..c8835c19 100644 --- a/attic/key.py +++ b/attic/key.py @@ -4,7 +4,7 @@ import os import msgpack import textwrap import hmac -from hashlib import sha256 +from hashlib import sha256, sha512 import zlib try: @@ -25,6 +25,21 @@ class UnsupportedPayloadError(Error): """Unsupported payload type {}. A newer version is required to access this repository. """ +class sha512_256(object): # note: can't subclass sha512 + """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" + def __init__(self, data=b''): + self.h = sha512(data) + + def update(self, data): + self.h.update(data) + + def digest(self): + return self.h.digest()[:32] + + def hexdigest(self): + return self.h.hexdigest()[:64] + + class HMAC(hmac.HMAC): """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews """ @@ -32,108 +47,97 @@ class HMAC(hmac.HMAC): self.inner.update(msg) -def compressor_creator(args): - if args is None: # used by unit tests - return ZlibCompressor.create(args) - if args.compression == 'zlib': - return ZlibCompressor.create(args) - if args.compression == 'lzma': - return LzmaCompressor.create(args) - if args.compression == 'none': - return NullCompressor.create(args) - raise NotImplemented(args.compression) +class SHA256(object): # note: can't subclass sha256 + TYPE = 0x00 + + def __init__(self, key, data=b''): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a HMAC if you have a key") + self.h = sha256(data) + + def update(self, data): + self.h.update(data) + + def digest(self): + return self.h.digest() + + def hexdigest(self): + return self.h.hexdigest() -def compressor_factory(manifest_data): - # compression is determined by 4 upper bits of the type byte - compression_type = manifest_data[0] & 0xf0 - if compression_type == ZlibCompressor.TYPE: - return ZlibCompressor() - if compression_type == LzmaCompressor.TYPE: - return LzmaCompressor() - if compression_type == NullCompressor.TYPE: - return NullCompressor() - raise UnsupportedPayloadError(manifest_data[0]) +class SHA512_256(sha512_256): + """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" + TYPE = 0x01 + + def __init__(self, key, data): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a HMAC if you have a key") + super().__init__(data) -class CompressorBase(object): - @classmethod - def create(cls, args): - return cls() +HASH_DEFAULT = SHA256.TYPE + + +class HMAC_SHA256(HMAC): + TYPE = 0x02 + + def __init__(self, key, data): + if key is None: + raise Exception("do not use HMAC if you don't have a key") + super().__init__(key, data, sha256) + + +class HMAC_SHA512_256(HMAC): + TYPE = 0x03 + + def __init__(self, key, data): + if key is None: + raise Exception("do not use HMAC if you don't have a key") + super().__init__(key, data, sha512_256) + + +MAC_DEFAULT = HMAC_SHA256.TYPE + + +class ZlibCompressor(object): # uses 0..9 in the mapping + TYPE = 0 + LEVELS = range(10) def compress(self, data): - pass - - def decompress(self, data): - pass - - -class ZlibCompressor(CompressorBase): - TYPE = 0x00 # must be 0x00 for backwards compatibility - - def compress(self, data): - return zlib.compress(data) + level = self.TYPE - ZlibCompressor.TYPE + return zlib.compress(data, level) def decompress(self, data): return zlib.decompress(data) -class LzmaCompressor(CompressorBase): - TYPE = 0x10 +class LzmaCompressor(object): # uses 10..19 in the mapping + TYPE = 10 + PRESETS = range(10) def __init__(self): if lzma is None: raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi") def compress(self, data): - return lzma.compress(data) + preset = self.TYPE - LzmaCompressor.TYPE + return lzma.compress(data, preset=preset) def decompress(self, data): return lzma.decompress(data) -class NullCompressor(CompressorBase): - TYPE = 0x20 - - def compress(self, data): - return data - - def decompress(self, data): - return data - - -def key_creator(repository, args): - if args.encryption == 'keyfile': - return KeyfileKey.create(repository, args) - if args.encryption == 'passphrase': - return PassphraseKey.create(repository, args) - if args.encryption == 'none': - return PlaintextKey.create(repository, args) - raise NotImplemented(args.encryption) - - -def key_factory(repository, manifest_data): - # key type is determined by 4 lower bits of the type byte - key_type = manifest_data[0] & 0x0f - if key_type == KeyfileKey.TYPE: - return KeyfileKey.detect(repository, manifest_data) - if key_type == PassphraseKey.TYPE: - return PassphraseKey.detect(repository, manifest_data) - if key_type == PlaintextKey.TYPE: - return PlaintextKey.detect(repository, manifest_data) - raise UnsupportedPayloadError(manifest_data[0]) +COMPR_DEFAULT = ZlibCompressor.TYPE + 6 # zlib level 6 class KeyBase(object): + TYPE = 0x00 # override in derived classes - def __init__(self, compressor): - self.compressor = compressor - self.TYPE_STR = bytes([self.TYPE | self.compressor.TYPE]) - - def type_check(self, type_byte): - type_str = bytes([type_byte]) - if type_str != self.TYPE_STR: - raise IntegrityError('Invalid encryption envelope %r' % type_str) + def __init__(self, compressor, maccer): + self.compressor = compressor() + self.maccer = maccer def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -155,23 +159,27 @@ class PlaintextKey(KeyBase): def create(cls, repository, args): print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.') compressor = compressor_creator(args) - return cls(compressor) + maccer = maccer_creator(args, cls) + return cls(compressor, maccer) @classmethod def detect(cls, repository, manifest_data): - compressor = compressor_factory(manifest_data) - return cls(compressor) + offset, compressor, crypter, maccer = parser(manifest_data) + return cls(compressor, maccer) def id_hash(self, data): - return sha256(data).digest() + return self.maccer(None, data).digest() def encrypt(self, data): - return b''.join([self.TYPE_STR, self.compressor.compress(data)]) + header = make_header(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE) + return b''.join([header, self.compressor.compress(data)]) def decrypt(self, id, data): - self.type_check(data[0]) - data = self.compressor.decompress(memoryview(data)[1:]) - if id and sha256(data).digest() != id: + offset, compressor, crypter, maccer = parser(data) + assert isinstance(self, crypter) + assert self.maccer is maccer + data = self.compressor.decompress(memoryview(data)[offset:]) + if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') return data @@ -181,42 +189,45 @@ class AESKeyBase(KeyBase): Chunks are encrypted using 256bit AES in Counter Mode (CTR) - Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT + Payload layout: HEADER(4) + HMAC(32) + NONCE(8) + CIPHERTEXT To reduce payload size only 8 bytes of the 16 bytes nonce is saved in the payload, the first 8 bytes are always zeros. This does not affect security but limits the maximum repository capacity to only 295 exabytes! """ - - PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE + PAYLOAD_OVERHEAD = 4 + 32 + 8 # HEADER + HMAC + NONCE def id_hash(self, data): """Return HMAC hash using the "id" HMAC key """ - return HMAC(self.id_key, data, sha256).digest() + return self.maccer(self.id_key, data).digest() def encrypt(self, data): data = self.compressor.compress(data) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) - hmac = HMAC(self.enc_hmac_key, data, sha256).digest() - return b''.join((self.TYPE_STR, hmac, data)) + hmac = self.maccer(self.enc_hmac_key, data).digest() + header = make_header(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE) + return b''.join((header, hmac, data)) def decrypt(self, id, data): - self.type_check(data[0]) - hmac = memoryview(data)[1:33] - if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: + offset, compressor, crypter, maccer = parser(data) + assert isinstance(self, crypter) + assert self.maccer is maccer + hmac = memoryview(data)[offset:offset+32] + if memoryview(self.maccer(self.enc_hmac_key, memoryview(data)[offset+32:]).digest()) != hmac: raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview - if id and HMAC(self.id_key, data, sha256).digest() != id: + self.dec_cipher.reset(iv=PREFIX + data[offset+32:offset+40]) + data = self.compressor.decompress(self.dec_cipher.decrypt(data[offset+40:])) # should use memoryview + if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') return data def extract_nonce(self, payload): - self.type_check(payload[0]) - nonce = bytes_to_long(payload[33:41]) + offset, compressor, crypter, maccer = parser(payload) + assert isinstance(self, crypter) + nonce = bytes_to_long(payload[offset+32:offset+40]) return nonce def init_from_random_data(self, data): @@ -240,7 +251,8 @@ class PassphraseKey(AESKeyBase): @classmethod def create(cls, repository, args): compressor = compressor_creator(args) - key = cls(compressor) + maccer = maccer_creator(args, cls) + key = cls(compressor, maccer) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is not None: passphrase2 = passphrase @@ -262,8 +274,8 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - compressor = compressor_factory(manifest_data) - key = cls(compressor) + offset, compressor, crypter, maccer = parser(manifest_data) + key = cls(compressor, maccer) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is None: passphrase = getpass(prompt) @@ -271,7 +283,7 @@ class PassphraseKey(AESKeyBase): key.init(repository, passphrase) try: key.decrypt(None, manifest_data) - num_blocks = num_aes_blocks(len(manifest_data) - 41) + num_blocks = num_aes_blocks(len(manifest_data) - offset - 40) key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) return key except IntegrityError: @@ -288,14 +300,14 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - compressor = compressor_factory(manifest_data) - key = cls(compressor) + offset, compressor, crypter, maccer = parser(manifest_data) + key = cls(compressor, maccer) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path passphrase = os.environ.get('ATTIC_PASSPHRASE', '') while not key.load(path, passphrase): passphrase = getpass(prompt) - num_blocks = num_aes_blocks(len(manifest_data) - 41) + num_blocks = num_aes_blocks(len(manifest_data) - offset - 40) key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) return key @@ -398,7 +410,8 @@ class KeyfileKey(AESKeyBase): if passphrase != passphrase2: print('Passphrases do not match') compressor = compressor_creator(args) - key = cls(compressor) + maccer = maccer_creator(args, cls) + key = cls(compressor, maccer) key.repository_id = repository.id key.init_from_random_data(get_random_bytes(100)) key.init_ciphers() @@ -406,3 +419,115 @@ class KeyfileKey(AESKeyBase): print('Key file "%s" created.' % key.path) print('Keep this file safe. Your data will be inaccessible without it.') return key + + +# note: key 0 nicely maps to a zlib compressor with level 0 which means "no compression" +compressor_mapping = {} +for level in ZlibCompressor.LEVELS: + compressor_mapping[ZlibCompressor.TYPE + level] = \ + type('ZlibCompressorLevel%d' % level, (ZlibCompressor, ), dict(TYPE=ZlibCompressor.TYPE + level)) +for preset in LzmaCompressor.PRESETS: + compressor_mapping[LzmaCompressor.TYPE + preset] = \ + type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset)) + + +crypter_mapping = { + KeyfileKey.TYPE: KeyfileKey, + PassphraseKey.TYPE: PassphraseKey, + PlaintextKey.TYPE: PlaintextKey, +} + + +maccer_mapping = { + # simple hashes, not MACs (but MAC-like signature): + SHA256.TYPE: SHA256, + SHA512_256.TYPE: SHA512_256, + # MACs: + HMAC_SHA256.TYPE: HMAC_SHA256, + HMAC_SHA512_256.TYPE: HMAC_SHA512_256, +} + + +def p(offset, compr_type, crypt_type, mac_type): + try: + compressor = compressor_mapping[compr_type] + crypter = crypter_mapping[crypt_type] + maccer = maccer_mapping[mac_type] + except KeyError: + raise UnsupportedPayloadError("compr_type %x crypt_type %x mac_type %x" % (compr_type, crypt_type, mac_type)) + return offset, compressor, crypter, maccer + + +def parser00(data): # legacy, hardcoded + return p(offset=1, compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE) + + +def parser01(data): # legacy, hardcoded + return p(offset=1, compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE) + + +def parser02(data): # legacy, hardcoded + return p(offset=1, compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE) + + +def parser03(data): # new & flexible + offset = 4 + compr_type, crypt_type, mac_type = data[1:offset] + return p(offset=offset, compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type) + + +def parser(data): + parser_mapping = { + 0x00: parser00, + 0x01: parser01, + 0x02: parser02, + 0x03: parser03, + } + header_type = data[0] + parser_func = parser_mapping[header_type] + return parser_func(data) + + +def key_factory(repository, manifest_data): + offset, compressor, crypter, maccer = parser(manifest_data) + return crypter.detect(repository, manifest_data) + + +def make_header(compr_type, crypt_type, mac_type): + # always create new-style 0x03 headers + return bytes([0x03, compr_type, crypt_type, mac_type]) + + +def compressor_creator(args): + # args == None is used by unit tests + compression = COMPR_DEFAULT if args is None else args.compression + compressor = compressor_mapping.get(compression) + if compressor is None: + raise NotImplementedError("no compression %d" % args.compression) + return compressor + + +def key_creator(repository, args): + if args.encryption == 'keyfile': + return KeyfileKey.create(repository, args) + if args.encryption == 'passphrase': + return PassphraseKey.create(repository, args) + if args.encryption == 'none': + return PlaintextKey.create(repository, args) + raise NotImplemented("no encryption %s" % args.encryption) + + +def maccer_creator(args, key_cls): + # args == None is used by unit tests + mac = None if args is None else args.mac + if mac is None: + if key_cls is PlaintextKey: + mac = HASH_DEFAULT + elif key_cls in (KeyfileKey, PassphraseKey): + mac = MAC_DEFAULT + else: + raise NotImplementedError("unknown key class") + maccer = maccer_mapping.get(mac) + if maccer is None: + raise NotImplementedError("no mac %d" % args.mac) + return maccer diff --git a/attic/testsuite/archive.py b/attic/testsuite/archive.py index bdc7aea5..5d478dc0 100644 --- a/attic/testsuite/archive.py +++ b/attic/testsuite/archive.py @@ -1,7 +1,7 @@ import msgpack from attic.testsuite import AtticTestCase from attic.archive import CacheChunkBuffer, RobustUnpacker -from attic.key import PlaintextKey, ZlibCompressor +from attic.key import PlaintextKey, COMPR_DEFAULT class MockCache: @@ -16,10 +16,15 @@ class MockCache: class ChunkBufferTestCase(AtticTestCase): + class MockArgs(object): + repository = None + compression = COMPR_DEFAULT + mac = None + def test(self): data = [{b'foo': 1}, {b'bar': 2}] cache = MockCache() - key = PlaintextKey(ZlibCompressor()) + key = PlaintextKey.create(None, self.MockArgs()) chunks = CacheChunkBuffer(cache, key, None) for d in data: chunks.add(d) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc85..ab11016c 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -359,8 +359,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): hash = sha256(data).digest() if not hash in seen: seen.add(hash) - num_blocks = num_aes_blocks(len(data) - 41) - nonce = bytes_to_long(data[33:41]) + num_blocks = num_aes_blocks(len(data) - 4 - 40) + nonce = bytes_to_long(data[4+32:4+40]) for counter in range(nonce, nonce + num_blocks): self.assert_not_in(counter, used) used.add(counter) diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 5e191abe..e69a0c86 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -5,7 +5,7 @@ import tempfile from binascii import hexlify from attic.crypto import bytes_to_long, num_aes_blocks from attic.testsuite import AtticTestCase -from attic.key import PlaintextKey, PassphraseKey, KeyfileKey +from attic.key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT from attic.helpers import Location, unhexlify @@ -13,7 +13,8 @@ class KeyTestCase(AtticTestCase): class MockArgs(object): repository = Location(tempfile.mkstemp()[1]) - compression = 'zlib' + compression = COMPR_DEFAULT + mac = None keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 @@ -46,7 +47,7 @@ class KeyTestCase(AtticTestCase): id = bytes(32) def test_plaintext(self): - key = PlaintextKey.create(None, None) + key = PlaintextKey.create(None, self.MockArgs()) data = b'foo' self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae') self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data))) @@ -79,7 +80,7 @@ class KeyTestCase(AtticTestCase): def test_passphrase(self): os.environ['ATTIC_PASSPHRASE'] = 'test' - key = PassphraseKey.create(self.MockRepository(), None) + key = PassphraseKey.create(self.MockRepository(), self.MockArgs()) self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901') From d3f2b8d22a7a35900d99743570eef2b550f6cae8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 2 Mar 2015 21:57:03 +0100 Subject: [PATCH 08/58] fix sha512_256 implementation --- attic/key.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/attic/key.py b/attic/key.py index c8835c19..4f3251bf 100644 --- a/attic/key.py +++ b/attic/key.py @@ -27,17 +27,19 @@ class UnsupportedPayloadError(Error): class sha512_256(object): # note: can't subclass sha512 """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" + digest_size = 32 + def __init__(self, data=b''): self.h = sha512(data) - def update(self, data): - self.h.update(data) - def digest(self): - return self.h.digest()[:32] + return self.h.digest()[:self.digest_size] def hexdigest(self): - return self.h.hexdigest()[:64] + return self.h.hexdigest()[:self.digest_size * 2] + + def __getattr__(self, item): + return getattr(self.h, item) class HMAC(hmac.HMAC): From e5d0f8fc1ab484549b2d7807c586b13bea56529e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 2 Mar 2015 23:47:30 +0100 Subject: [PATCH 09/58] more sha512_256 fixes --- attic/key.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/attic/key.py b/attic/key.py index 4f3251bf..92b8de60 100644 --- a/attic/key.py +++ b/attic/key.py @@ -27,19 +27,28 @@ class UnsupportedPayloadError(Error): class sha512_256(object): # note: can't subclass sha512 """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" - digest_size = 32 + digestsize = digest_size = 32 + block_size = 64 - def __init__(self, data=b''): - self.h = sha512(data) + def __init__(self, data=None): + self.name = 'sha512-256' + self._h = sha512() + if data: + self.update(data) + + def update(self, data): + self._h.update(data) def digest(self): - return self.h.digest()[:self.digest_size] + return self._h.digest()[:self.digest_size] def hexdigest(self): - return self.h.hexdigest()[:self.digest_size * 2] + return self._h.hexdigest()[:self.digest_size * 2] - def __getattr__(self, item): - return getattr(self.h, item) + def copy(self): + new = sha512_256.__new__(sha512_256) + new._h = self._h.copy() + return new class HMAC(hmac.HMAC): @@ -218,7 +227,8 @@ class AESKeyBase(KeyBase): assert isinstance(self, crypter) assert self.maccer is maccer hmac = memoryview(data)[offset:offset+32] - if memoryview(self.maccer(self.enc_hmac_key, memoryview(data)[offset+32:]).digest()) != hmac: + computed_hmac = memoryview(self.maccer(self.enc_hmac_key, memoryview(data)[offset+32:]).digest()) + if computed_hmac != hmac: raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=PREFIX + data[offset+32:offset+40]) data = self.compressor.decompress(self.dec_cipher.decrypt(data[offset+40:])) # should use memoryview From 1e1d80c7b0891c5d2ee98dbd32b4916fb2a92cbe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 6 Mar 2015 03:29:29 +0100 Subject: [PATCH 10/58] use AES-GCM (an AEAD single-pass mac&cipher) This has special and extremely fast HW acceleration on e.g recent Intel CPUs: AES-NI and PCLMULQDQ. Notes: a) I had to kill AES.iv method, it just did not work for aes-gcm as done by openssl. As the incremented IV (counter) can't be read back, we have to keep and manually increment it in Key.enc_iv. b) there is a hack in AES.compute_tag_and_encrypt to add 16B of zero to the right of the gmac (which is also 16B) because the current callers expects 32B. AES.check_tag_and_encrypt is tolerant of such a 32B tag, but will only use the left 16B and ignore the right 16B if needed. this is a bit dirty, but I didn't want to change the header layout within this changeset. c) switched from mac&encrypt to encrypt-then-mac (using aes-gcm) for the keyfile 'data' entry d) also added a test that creates the testdata needed for the constants at top of testsuite/key.py e) I kept enc_hmac_key although it is not used by the code in this changeset. But we'll need to keep supporting the old algorithms, too. --- attic/crypto.pyx | 51 +++++++++++++++++++-------- attic/key.py | 73 +++++++++++++++++++++++++-------------- attic/testsuite/crypto.py | 24 +++++++------ attic/testsuite/key.py | 41 +++++++++++++--------- 4 files changed, 123 insertions(+), 66 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index 61dbc42d..17161975 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -7,6 +7,9 @@ from libc.stdlib cimport malloc, free API_VERSION = 2 +TAG_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below. +IV_SIZE = 16 # bytes; 128 bits + cdef extern from "openssl/rand.h": int RAND_bytes(unsigned char *buf, int num) @@ -22,7 +25,7 @@ cdef extern from "openssl/evp.h": ctypedef struct ENGINE: pass const EVP_MD *EVP_sha256() - const EVP_CIPHER *EVP_aes_256_ctr() + const EVP_CIPHER *EVP_aes_256_gcm() void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a) void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a) @@ -36,11 +39,14 @@ cdef extern from "openssl/evp.h": const unsigned char *in_, int inl) int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) - + int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, unsigned char *ptr) int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen, const unsigned char *salt, int saltlen, int iter, const EVP_MD *digest, int keylen, unsigned char *out) + int EVP_CTRL_GCM_GET_TAG + int EVP_CTRL_GCM_SET_TAG + int EVP_CTRL_GCM_SET_IVLEN import struct @@ -98,7 +104,7 @@ cdef class AES: EVP_CIPHER_CTX_init(&self.ctx) self.is_encrypt = is_encrypt # Set cipher type and mode - cipher_mode = EVP_aes_256_ctr() + cipher_mode = EVP_aes_256_gcm() if self.is_encrypt: if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): raise Exception('EVP_EncryptInit_ex failed') @@ -117,6 +123,9 @@ cdef class AES: key2 = key if iv: iv2 = iv + # Set IV length (bytes) + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL): + raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed') # Initialise key and IV if self.is_encrypt: if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): @@ -125,16 +134,24 @@ cdef class AES: if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): raise Exception('EVP_DecryptInit_ex failed') - @property - def iv(self): - return self.ctx.iv[:16] + def add(self, aad): + cdef int aadl = len(aad) + cdef int outl + # Zero or more calls to specify any AAD + if self.is_encrypt: + if not EVP_EncryptUpdate(&self.ctx, NULL, &outl, aad, aadl): + raise Exception('EVP_EncryptUpdate failed') + else: # decrypt + if not EVP_DecryptUpdate(&self.ctx, NULL, &outl, aad, aadl): + raise Exception('EVP_DecryptUpdate failed') - def encrypt(self, data): + def compute_tag_and_encrypt(self, data): cdef int inl = len(data) cdef int ctl = 0 cdef int outl = 0 - # note: modes that use padding, need up to one extra AES block (16b) + # note: modes that use padding, need up to one extra AES block (16B) cdef unsigned char *out = malloc(inl+16) + cdef unsigned char *tag = malloc(TAG_SIZE) if not out: raise MemoryError try: @@ -144,15 +161,20 @@ cdef class AES: if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): raise Exception('EVP_EncryptFinal failed') ctl += outl - return out[:ctl] + # Get tag + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, TAG_SIZE, tag): + raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed') + # hack: caller wants 32B tags (256b), so we give back that amount + return (tag[:TAG_SIZE] + b'\x00'*16), out[:ctl] finally: + free(tag) free(out) - def decrypt(self, data): + def check_tag_and_decrypt(self, tag, data): cdef int inl = len(data) cdef int ptl = 0 cdef int outl = 0 - # note: modes that use padding, need up to one extra AES block (16b). + # note: modes that use padding, need up to one extra AES block (16B). # This is what the openssl docs say. I am not sure this is correct, # but OTOH it will not cause any harm if our buffer is a little bigger. cdef unsigned char *out = malloc(inl+16) @@ -162,10 +184,11 @@ cdef class AES: if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): raise Exception('EVP_DecryptUpdate failed') ptl = outl + # Set expected tag value. + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, TAG_SIZE, tag): + raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed') if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: - # this error check is very important for modes with padding or - # authentication. for them, a failure here means corrupted data. - # CTR mode does not use padding nor authentication. + # a failure here means corrupted / tampered tag or data raise Exception('EVP_DecryptFinal failed') ptl += outl return out[:ptl] diff --git a/attic/key.py b/attic/key.py index 1dbd279a..680015c2 100644 --- a/attic/key.py +++ b/attic/key.py @@ -50,7 +50,7 @@ class KeyBase(object): self.TYPE_STR = bytes([self.TYPE]) def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key + """Return a HASH (no id_key) or a MAC (using the "id_key" key) """ def encrypt(self, data): @@ -92,9 +92,9 @@ class PlaintextKey(KeyBase): class AESKeyBase(KeyBase): """Common base class shared by KeyfileKey and PassphraseKey - Chunks are encrypted using 256bit AES in Counter Mode (CTR) + Chunks are encrypted using 256bit AES in Galois Counter Mode (GCM) - Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT + Payload layout: TYPE(1) + TAG(32) + NONCE(8) + CIPHERTEXT To reduce payload size only 8 bytes of the 16 bytes nonce is saved in the payload, the first 8 bytes are always zeros. This does not @@ -105,45 +105,68 @@ class AESKeyBase(KeyBase): PAYLOAD_OVERHEAD = 1 + 32 + 8 # TYPE + HMAC + NONCE def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key """ - return HMAC(self.id_key, data, sha256).digest() + Return GMAC using the "id_key" GMAC key + + XXX do we need a cryptographic hash function here or is a keyed hash + function like GMAC / GHASH good enough? See NIST SP 800-38D. + + IMPORTANT: in 1 repo, there should be only 1 kind of id_hash, otherwise + data hashed/maced with one id_hash might result in same ID as already + exists in the repo for other data created with another id_hash method. + somehow unlikely considering 128 or 256bits, but still. + """ + mac_cipher = AES(is_encrypt=True, key=self.id_key, iv=b'\0'*16) # XXX do we need an IV here? + # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data + mac_cipher.add(bytes(data)) + tag, _ = mac_cipher.compute_tag_and_encrypt(b'') + return tag def encrypt(self, data): data = zlib.compress(data) - self.enc_cipher.reset() - data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) - hmac = HMAC(self.enc_hmac_key, data, sha256).digest() - return b''.join((self.TYPE_STR, hmac, data)) + self.enc_cipher.reset(iv=self.enc_iv) + iv_last8 = self.enc_iv[8:] + self.enc_cipher.add(iv_last8) + tag, data = self.enc_cipher.compute_tag_and_encrypt(data) + # increase the IV (counter) value so same value is never used twice + current_iv = bytes_to_long(iv_last8) + self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) + return b''.join((self.TYPE_STR, tag, iv_last8, data)) def decrypt(self, id, data): if data[0] != self.TYPE: raise IntegrityError('Invalid encryption envelope') - hmac = memoryview(data)[1:33] - if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: + iv_last8 = data[1+32:1+40] + iv = PREFIX + iv_last8 + self.dec_cipher.reset(iv=iv) + self.dec_cipher.add(iv_last8) + tag, data = data[1:1+32], data[1+40:] + try: + data = self.dec_cipher.check_tag_and_decrypt(tag, data) + except Exception: raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview - if id and HMAC(self.id_key, data, sha256).digest() != id: + data = zlib.decompress(data) + if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') return data def extract_nonce(self, payload): if payload[0] != self.TYPE: - raise IntegrityError('Invalid encryption envelope') + raise IntegrityError('Invalid encryption envelope') nonce = bytes_to_long(payload[33:41]) return nonce def init_from_random_data(self, data): self.enc_key = data[0:32] - self.enc_hmac_key = data[32:64] + self.enc_hmac_key = data[32:64] # XXX enc_hmac_key not used for AES-GCM self.id_key = data[64:96] self.chunk_seed = bytes_to_int(data[96:100]) # Convert to signed int32 if self.chunk_seed & 0x80000000: self.chunk_seed = self.chunk_seed - 0xffffffff - 1 - def init_ciphers(self, enc_iv=b''): + def init_ciphers(self, enc_iv=PREFIX * 2): # default IV = 16B zero + self.enc_iv = enc_iv self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv) self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) @@ -242,25 +265,25 @@ class KeyfileKey(AESKeyBase): def decrypt_key_file(self, data, passphrase): d = msgpack.unpackb(data) assert d[b'version'] == 1 - assert d[b'algorithm'] == b'sha256' + assert d[b'algorithm'] == b'gmac' key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) - data = AES(is_encrypt=False, key=key).decrypt(d[b'data']) - if HMAC(key, data, sha256).digest() != d[b'hash']: + try: + data = AES(is_encrypt=False, key=key, iv=b'\0'*16).check_tag_and_decrypt(d[b'hash'], d[b'data']) + return data + except Exception: return None - return data def encrypt_key_file(self, data, passphrase): salt = get_random_bytes(32) iterations = 100000 key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) - hash = HMAC(key, data, sha256).digest() - cdata = AES(is_encrypt=True, key=key).encrypt(data) + tag, cdata = AES(is_encrypt=True, key=key, iv=b'\0'*16).compute_tag_and_encrypt(data) d = { 'version': 1, 'salt': salt, 'iterations': iterations, - 'algorithm': 'sha256', - 'hash': hash, + 'algorithm': 'gmac', + 'hash': tag, 'data': cdata, } return msgpack.packb(d) diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index 304ef97c..f4b9a9cb 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -27,18 +27,20 @@ class CryptoTestCase(AtticTestCase): self.assert_equal(len(bytes2), 10) self.assert_not_equal(bytes, bytes2) - def test_aes(self): + def test_aes_gcm(self): key = b'X' * 32 + iv = b'A' * 16 data = b'foo' * 10 # encrypt - aes = AES(is_encrypt=True, key=key) - self.assert_equal(bytes_to_long(aes.iv, 8), 0) - cdata = aes.encrypt(data) - self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') - self.assert_equal(bytes_to_long(aes.iv, 8), 2) - # decrypt - aes = AES(is_encrypt=False, key=key) - self.assert_equal(bytes_to_long(aes.iv, 8), 0) - pdata = aes.decrypt(cdata) + aes = AES(is_encrypt=True, key=key, iv=iv) + tag, cdata = aes.compute_tag_and_encrypt(data) + self.assert_equal(hexlify(tag), b'c98aa10eb6b7031bcc2160878d9438fb00000000000000000000000000000000') + self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741') + # decrypt (correct tag/cdata) + aes = AES(is_encrypt=False, key=key, iv=iv) + pdata = aes.check_tag_and_decrypt(tag, cdata) self.assert_equal(data, pdata) - self.assert_equal(bytes_to_long(aes.iv, 8), 2) + # decrypt (incorrect tag/cdata) + aes = AES(is_encrypt=False, key=key, iv=iv) + cdata = b'x' + cdata[1:] # corrupt cdata + self.assertRaises(Exception, aes.check_tag_and_decrypt, tag, cdata) diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 543d1be3..de31f272 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -15,20 +15,20 @@ class KeyTestCase(AtticTestCase): repository = Location(tempfile.mkstemp()[1]) keyfile2_key_file = """ - ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 - hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq - bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l - 1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2 - hQCG2L2L/9PUu4WIuKvGrsXoP7syemujNfcZws5jLp2UPva4PkQhQsrF1RYDEMLh2eF9Ol - rwtkThq1tnh7KjWMG9Ijt7/aoQtq0zDYP/xaFF8XXSJxiyP5zjH5+spB6RL0oQHvbsliSh - /cXJq7jrqmrJ1phd6dg4SHAM/i+hubadZoS6m25OQzYAW09wZD/phG8OVa698Z5ed3HTaT - SmrtgJL3EoOKgUI9d6BLE4dJdBqntifo""".strip() +ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 +hqppdGVyYXRpb25zzgABhqCkc2FsdNoAICiRWfijWqIuvr+70VzOsUS4Y6NM45FWm6LgCu +2GyalGqWFsZ29yaXRobaRnbWFjpGhhc2jaACDgCK7u30Pi+Du1qHRyWBupAAAAAAAAAAAA +AAAAAAAAAKd2ZXJzaW9uAaRkYXRh2gDQrlCtq2mzdmkuhwIoko5+amxYqnlfNHHZxRFiX9 +F8AliP7H6S0j9uHyrBKRDWtj7VGYWVW8COy/FncLRgRhspB59rH3y/GS6pfeEw7RWUPd32 +eOcB6v8q+IHUvGttyFRcN6PxSFHBhOKN0jqStP0UqXLv+d9rGWi6X/HNZGu9WPkqs/g0G9 +xnf48i9pOy19aQo3HV//ubf+VYWmc1J8zjCS2Og0JkMtxbqM6j4mShPjkURZZBXSJGtORV +5IzNAzixJWmr8LR12TmFGVb0U9P79A==""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 0055f161493fcfc16276e8c31493c4641e1eb19a79d0326fad0291e5a9c98e5933 - 00000000000003e8d21eaf9b86c297a8cd56432e1915bb + 004078370be366ac3ad9d147992be8ebee000000000000000000000000000000000000000000000000 + b94bfb5d0a63b0c47cf74e2d0585aa """)) - keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314') + keyfile2_id = unhexlify('45f309b4ef353c467d16a19039b87e5400000000000000000000000000000000') def setUp(self): self.tmppath = tempfile.mkdtemp() @@ -44,6 +44,15 @@ class KeyTestCase(AtticTestCase): _location = _Location() id = bytes(32) + def _test_make_testdata(self): + # modify tearDown to not kill the key file first, before using this + os.environ['ATTIC_PASSPHRASE'] = 'passphrase' + key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) + print("keyfile2_key_file: find the it in the filesystem, see location in test log output") + print("keyfile2_cdata:", hexlify(key.encrypt(b'payload'))) + print("keyfile2_id:", hexlify(key.id_hash(b'payload'))) + assert False + def test_plaintext(self): key = PlaintextKey.create(None, None) data = b'foo' @@ -53,7 +62,7 @@ class KeyTestCase(AtticTestCase): def test_keyfile(self): os.environ['ATTIC_PASSPHRASE'] = 'test' key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) + self.assert_equal(bytes_to_long(key.enc_iv, 8), 0) manifest = key.encrypt(b'XXX') self.assert_equal(key.extract_nonce(manifest), 0) manifest2 = key.encrypt(b'XXX') @@ -62,7 +71,7 @@ class KeyTestCase(AtticTestCase): self.assert_equal(key.extract_nonce(manifest2), 1) iv = key.extract_nonce(manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD)) + self.assert_equal(bytes_to_long(key2.enc_iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD)) # Key data sanity check self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3) self.assert_equal(key2.chunk_seed == 0, False) @@ -79,7 +88,7 @@ class KeyTestCase(AtticTestCase): def test_passphrase(self): os.environ['ATTIC_PASSPHRASE'] = 'test' key = PassphraseKey.create(self.MockRepository(), None) - self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) + self.assert_equal(bytes_to_long(key.enc_iv, 8), 0) self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901') self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a') @@ -92,11 +101,11 @@ class KeyTestCase(AtticTestCase): self.assert_equal(key.extract_nonce(manifest2), 1) iv = key.extract_nonce(manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD)) + self.assert_equal(bytes_to_long(key2.enc_iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD)) self.assert_equal(key.id_key, key2.id_key) self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key) self.assert_equal(key.enc_key, key2.enc_key) self.assert_equal(key.chunk_seed, key2.chunk_seed) data = b'foo' - self.assert_equal(hexlify(key.id_hash(data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990') + self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde050100000000000000000000000000000000') self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data))) From 9f8b7b4c4a320686bfe2122b5f3b0c8771edc3f0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 15:01:52 +0100 Subject: [PATCH 11/58] clarify phrasing in comment a MAC can be seen as digital signature (but that was not meant in the comment, but the parameters of __init__ method, it's "signature") --- attic/key.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/key.py b/attic/key.py index 92b8de60..0da0f339 100644 --- a/attic/key.py +++ b/attic/key.py @@ -451,7 +451,7 @@ crypter_mapping = { maccer_mapping = { - # simple hashes, not MACs (but MAC-like signature): + # simple hashes, not MACs (but MAC-like class __init__ method signature): SHA256.TYPE: SHA256, SHA512_256.TYPE: SHA512_256, # MACs: From abaf0d34bbc9e5deaf128d467936d75df1f0417d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 15:53:51 +0100 Subject: [PATCH 12/58] use Meta namedtuple to pass around header metadata, cleanup --- attic/key.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/attic/key.py b/attic/key.py index 0da0f339..e6dfaaf0 100644 --- a/attic/key.py +++ b/attic/key.py @@ -460,32 +460,46 @@ maccer_mapping = { } -def p(offset, compr_type, crypt_type, mac_type): +from collections import namedtuple +Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type') + + +def get_implementations(meta): try: - compressor = compressor_mapping[compr_type] - crypter = crypter_mapping[crypt_type] - maccer = maccer_mapping[mac_type] + compressor = compressor_mapping[meta.compr_type] + crypter = crypter_mapping[meta.crypt_type] + maccer = maccer_mapping[meta.mac_type] except KeyError: - raise UnsupportedPayloadError("compr_type %x crypt_type %x mac_type %x" % (compr_type, crypt_type, mac_type)) - return offset, compressor, crypter, maccer + raise UnsupportedPayloadError("compr_type %x crypt_type %x mac_type %x" % ( + meta.compr_type, meta.crypt_type, meta.mac_type)) + return compressor, crypter, maccer def parser00(data): # legacy, hardcoded - return p(offset=1, compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE) - + offset = 1 + meta = Meta(compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE) + compressor, crypter, maccer = get_implementations(meta) + return offset, compressor, crypter, maccer def parser01(data): # legacy, hardcoded - return p(offset=1, compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE) - + offset = 1 + meta = Meta(compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE) + compressor, crypter, maccer = get_implementations(meta) + return offset, compressor, crypter, maccer def parser02(data): # legacy, hardcoded - return p(offset=1, compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE) + offset = 1 + meta = Meta(compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE) + compressor, crypter, maccer = get_implementations(meta) + return offset, compressor, crypter, maccer def parser03(data): # new & flexible offset = 4 compr_type, crypt_type, mac_type = data[1:offset] - return p(offset=offset, compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type) + meta = Meta(compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type) + compressor, crypter, maccer = get_implementations(meta) + return offset, compressor, crypter, maccer def parser(data): From 7cecee0b290aa0a232197265f68299d97b0a9994 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 19:11:35 +0100 Subject: [PATCH 13/58] use parser to completely analyze given format into meta and data, use generator to create format from meta and data --- attic/key.py | 122 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 44 deletions(-) diff --git a/attic/key.py b/attic/key.py index e6dfaaf0..31a0e1ec 100644 --- a/attic/key.py +++ b/attic/key.py @@ -3,6 +3,7 @@ from getpass import getpass import os import msgpack import textwrap +from collections import namedtuple import hmac from hashlib import sha256, sha512 import zlib @@ -143,6 +144,9 @@ class LzmaCompressor(object): # uses 10..19 in the mapping COMPR_DEFAULT = ZlibCompressor.TYPE + 6 # zlib level 6 +Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type, hmac, iv, stored_iv') + + class KeyBase(object): TYPE = 0x00 # override in derived classes @@ -175,21 +179,23 @@ class PlaintextKey(KeyBase): @classmethod def detect(cls, repository, manifest_data): - offset, compressor, crypter, maccer = parser(manifest_data) + meta, data, compressor, crypter, maccer = parser(manifest_data) return cls(compressor, maccer) def id_hash(self, data): return self.maccer(None, data).digest() def encrypt(self, data): - header = make_header(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE) - return b''.join([header, self.compressor.compress(data)]) + meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE, + hmac=None, iv=None, stored_iv=None) + data = self.compressor.compress(data) + return generate(meta, data) def decrypt(self, id, data): - offset, compressor, crypter, maccer = parser(data) + meta, data, compressor, crypter, maccer = parser(data) assert isinstance(self, crypter) assert self.maccer is maccer - data = self.compressor.decompress(memoryview(data)[offset:]) + data = self.compressor.decompress(data) if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') return data @@ -207,7 +213,7 @@ class AESKeyBase(KeyBase): affect security but limits the maximum repository capacity to only 295 exabytes! """ - PAYLOAD_OVERHEAD = 4 + 32 + 8 # HEADER + HMAC + NONCE + PAYLOAD_OVERHEAD = 4 + 32 + 8 # HEADER + HMAC + NONCE, TODO: get rid of this def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -217,29 +223,31 @@ class AESKeyBase(KeyBase): def encrypt(self, data): data = self.compressor.compress(data) self.enc_cipher.reset() - data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) - hmac = self.maccer(self.enc_hmac_key, data).digest() - header = make_header(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE) - return b''.join((header, hmac, data)) + stored_iv = self.enc_cipher.iv[8:] + iv = PREFIX + stored_iv + data = self.enc_cipher.encrypt(data) + hmac = self.maccer(self.enc_hmac_key, stored_iv + data).digest() + meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE, + hmac=hmac, iv=iv, stored_iv=stored_iv) + return generate(meta, data) def decrypt(self, id, data): - offset, compressor, crypter, maccer = parser(data) + meta, data, compressor, crypter, maccer = parser(data) assert isinstance(self, crypter) assert self.maccer is maccer - hmac = memoryview(data)[offset:offset+32] - computed_hmac = memoryview(self.maccer(self.enc_hmac_key, memoryview(data)[offset+32:]).digest()) - if computed_hmac != hmac: + computed_hmac = self.maccer(self.enc_hmac_key, meta.stored_iv + data).digest() + if computed_hmac != meta.hmac: raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=PREFIX + data[offset+32:offset+40]) - data = self.compressor.decompress(self.dec_cipher.decrypt(data[offset+40:])) # should use memoryview + self.dec_cipher.reset(iv=meta.iv) + data = self.compressor.decompress(self.dec_cipher.decrypt(data)) if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') return data def extract_nonce(self, payload): - offset, compressor, crypter, maccer = parser(payload) + meta, data, compressor, crypter, maccer = parser(payload) assert isinstance(self, crypter) - nonce = bytes_to_long(payload[offset+32:offset+40]) + nonce = bytes_to_long(meta.stored_iv) return nonce def init_from_random_data(self, data): @@ -286,7 +294,7 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - offset, compressor, crypter, maccer = parser(manifest_data) + meta, data, compressor, crypter, maccer = parser(manifest_data) key = cls(compressor, maccer) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is None: @@ -295,7 +303,7 @@ class PassphraseKey(AESKeyBase): key.init(repository, passphrase) try: key.decrypt(None, manifest_data) - num_blocks = num_aes_blocks(len(manifest_data) - offset - 40) + num_blocks = num_aes_blocks(len(data)) key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) return key except IntegrityError: @@ -312,14 +320,14 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - offset, compressor, crypter, maccer = parser(manifest_data) + meta, data, compressor, crypter, maccer = parser(manifest_data) key = cls(compressor, maccer) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path passphrase = os.environ.get('ATTIC_PASSPHRASE', '') while not key.load(path, passphrase): passphrase = getpass(prompt) - num_blocks = num_aes_blocks(len(manifest_data) - offset - 40) + num_blocks = num_aes_blocks(len(data)) key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) return key @@ -460,10 +468,6 @@ maccer_mapping = { } -from collections import namedtuple -Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type') - - def get_implementations(meta): try: compressor = compressor_mapping[meta.compr_type] @@ -475,31 +479,55 @@ def get_implementations(meta): return compressor, crypter, maccer -def parser00(data): # legacy, hardcoded +def parser00(all_data): # legacy, hardcoded offset = 1 - meta = Meta(compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE) + hmac = all_data[offset:offset+32] + stored_iv = all_data[offset+32:offset+40] + iv = PREFIX + stored_iv + data = all_data[offset+40:] + meta = Meta(compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE, + hmac=hmac, iv=iv, stored_iv=stored_iv) compressor, crypter, maccer = get_implementations(meta) - return offset, compressor, crypter, maccer + return meta, data, compressor, crypter, maccer -def parser01(data): # legacy, hardcoded +def parser01(all_data): # legacy, hardcoded offset = 1 - meta = Meta(compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE) + hmac = all_data[offset:offset+32] + stored_iv = all_data[offset+32:offset+40] + iv = PREFIX + stored_iv + data = all_data[offset+40:] + meta = Meta(compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE, + hmac=hmac, iv=iv, stored_iv=stored_iv) compressor, crypter, maccer = get_implementations(meta) - return offset, compressor, crypter, maccer + return meta, data, compressor, crypter, maccer -def parser02(data): # legacy, hardcoded +def parser02(all_data): # legacy, hardcoded offset = 1 - meta = Meta(compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE) + hmac = None + iv = stored_iv = None + data = all_data[offset:] + meta = Meta(compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE, + hmac=hmac, iv=iv, stored_iv=stored_iv) compressor, crypter, maccer = get_implementations(meta) - return offset, compressor, crypter, maccer + return meta, data, compressor, crypter, maccer -def parser03(data): # new & flexible +def parser03(all_data): # new & flexible offset = 4 - compr_type, crypt_type, mac_type = data[1:offset] - meta = Meta(compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type) + compr_type, crypt_type, mac_type = all_data[1:offset] + if crypt_type == PlaintextKey.TYPE: + hmac = None + iv = stored_iv = None + data = all_data[offset:] + else: + hmac = all_data[offset:offset+32] + stored_iv = all_data[offset+32:offset+40] + iv = PREFIX + stored_iv + data = all_data[offset+40:] + meta = Meta(compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type, + hmac=hmac, iv=iv, stored_iv=stored_iv) compressor, crypter, maccer = get_implementations(meta) - return offset, compressor, crypter, maccer + return meta, data, compressor, crypter, maccer def parser(data): @@ -515,14 +543,20 @@ def parser(data): def key_factory(repository, manifest_data): - offset, compressor, crypter, maccer = parser(manifest_data) + meta, data, compressor, crypter, maccer = parser(manifest_data) return crypter.detect(repository, manifest_data) -def make_header(compr_type, crypt_type, mac_type): - # always create new-style 0x03 headers - return bytes([0x03, compr_type, crypt_type, mac_type]) - +def generate(meta, data): + # always create new-style 0x03 format + start = bytes([0x03, meta.compr_type, meta.crypt_type, meta.mac_type]) + if meta.crypt_type == PlaintextKey.TYPE: + result = start + data + else: + assert len(meta.hmac) == 32 + assert len(meta.stored_iv) == 8 + result = start + meta.hmac + meta.stored_iv + data + return result def compressor_creator(args): # args == None is used by unit tests From 75652d63904fec1796b685f8355f0805b821b7e5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 19:26:44 +0100 Subject: [PATCH 14/58] deduplicate legacy parser code --- attic/key.py | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/attic/key.py b/attic/key.py index 31a0e1ec..543c60e1 100644 --- a/attic/key.py +++ b/attic/key.py @@ -479,37 +479,30 @@ def get_implementations(meta): return compressor, crypter, maccer -def parser00(all_data): # legacy, hardcoded +def legacy_parser(all_data, crypt_type): # all rather hardcoded offset = 1 - hmac = all_data[offset:offset+32] - stored_iv = all_data[offset+32:offset+40] - iv = PREFIX + stored_iv - data = all_data[offset+40:] - meta = Meta(compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE, + if crypt_type == PlaintextKey.TYPE: + hmac = None + iv = stored_iv = None + data = all_data[offset:] + else: + hmac = all_data[offset:offset+32] + stored_iv = all_data[offset+32:offset+40] + iv = PREFIX + stored_iv + data = all_data[offset+40:] + meta = Meta(compr_type=6, crypt_type=crypt_type, mac_type=HMAC_SHA256.TYPE, hmac=hmac, iv=iv, stored_iv=stored_iv) compressor, crypter, maccer = get_implementations(meta) return meta, data, compressor, crypter, maccer -def parser01(all_data): # legacy, hardcoded - offset = 1 - hmac = all_data[offset:offset+32] - stored_iv = all_data[offset+32:offset+40] - iv = PREFIX + stored_iv - data = all_data[offset+40:] - meta = Meta(compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE, - hmac=hmac, iv=iv, stored_iv=stored_iv) - compressor, crypter, maccer = get_implementations(meta) - return meta, data, compressor, crypter, maccer +def parser00(all_data): + return legacy_parser(all_data, KeyfileKey.TYPE) -def parser02(all_data): # legacy, hardcoded - offset = 1 - hmac = None - iv = stored_iv = None - data = all_data[offset:] - meta = Meta(compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE, - hmac=hmac, iv=iv, stored_iv=stored_iv) - compressor, crypter, maccer = get_implementations(meta) - return meta, data, compressor, crypter, maccer +def parser01(all_data): + return legacy_parser(all_data, PassphraseKey.TYPE) + +def parser02(all_data): + return legacy_parser(all_data, PlaintextKey.TYPE) def parser03(all_data): # new & flexible From 3aabaa75f7296beaddd76195b8d172540c30147e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 19:58:38 +0100 Subject: [PATCH 15/58] get rid of PAYLOAD_OVERHEAD it was nice for fixed-sized overheads, but the next changeset makes them variable size, so a correct guess in the unit tests works better. --- attic/key.py | 2 -- attic/testsuite/key.py | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/attic/key.py b/attic/key.py index 543c60e1..45a7fef1 100644 --- a/attic/key.py +++ b/attic/key.py @@ -213,8 +213,6 @@ class AESKeyBase(KeyBase): affect security but limits the maximum repository capacity to only 295 exabytes! """ - PAYLOAD_OVERHEAD = 4 + 32 + 8 # HEADER + HMAC + NONCE, TODO: get rid of this - def id_hash(self, data): """Return HMAC hash using the "id" HMAC key """ diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index e69a0c86..7dbd09bd 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -64,7 +64,8 @@ class KeyTestCase(AtticTestCase): self.assert_equal(key.extract_nonce(manifest2), 1) iv = key.extract_nonce(manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD)) + # we just assume that the payload fits into 1 AES block (which is given for b'XXX'). + self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + 1) # Key data sanity check self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3) self.assert_equal(key2.chunk_seed == 0, False) @@ -94,7 +95,8 @@ class KeyTestCase(AtticTestCase): self.assert_equal(key.extract_nonce(manifest2), 1) iv = key.extract_nonce(manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD)) + # we just assume that the payload fits into 1 AES block (which is given for b'XXX'). + self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + 1) self.assert_equal(key.id_key, key2.id_key) self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key) self.assert_equal(key.enc_key, key2.enc_key) From fcea641e26bbaee686620808ee2bf6cd17331551 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 21:02:09 +0100 Subject: [PATCH 16/58] reimplement new 0x03 format using msgpack this is much easier to maintain (and change, if needed) than all those hardcoded offsets. note: when using packb to store a namedtuple's data, just the tuple's elements get stored, in order (not the names). thus, the overhead is rather small. but we can just recreate the namedtuple from the tuple returned by unpackb. namedtuples are very efficient and prettier to deal with than tuples. alternatively, a dictionary could be used, but packb would create more overhead for it as key names and values would be stored. --- attic/key.py | 25 ++++--------------------- attic/testsuite/archiver.py | 6 ++++-- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/attic/key.py b/attic/key.py index 45a7fef1..3f1b4170 100644 --- a/attic/key.py +++ b/attic/key.py @@ -504,19 +504,8 @@ def parser02(all_data): def parser03(all_data): # new & flexible - offset = 4 - compr_type, crypt_type, mac_type = all_data[1:offset] - if crypt_type == PlaintextKey.TYPE: - hmac = None - iv = stored_iv = None - data = all_data[offset:] - else: - hmac = all_data[offset:offset+32] - stored_iv = all_data[offset+32:offset+40] - iv = PREFIX + stored_iv - data = all_data[offset+40:] - meta = Meta(compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type, - hmac=hmac, iv=iv, stored_iv=stored_iv) + meta_tuple, data = msgpack.unpackb(all_data[1:]) + meta = Meta(*meta_tuple) compressor, crypter, maccer = get_implementations(meta) return meta, data, compressor, crypter, maccer @@ -540,14 +529,8 @@ def key_factory(repository, manifest_data): def generate(meta, data): # always create new-style 0x03 format - start = bytes([0x03, meta.compr_type, meta.crypt_type, meta.mac_type]) - if meta.crypt_type == PlaintextKey.TYPE: - result = start + data - else: - assert len(meta.hmac) == 32 - assert len(meta.stored_iv) == 8 - result = start + meta.hmac + meta.stored_iv + data - return result + return b'\x03' + msgpack.packb((meta, data)) + def compressor_creator(args): # args == None is used by unit tests diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index ab11016c..922a3546 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -13,6 +13,7 @@ from attic.archive import Archive, ChunkBuffer from attic.archiver import Archiver from attic.crypto import bytes_to_long, num_aes_blocks from attic.helpers import Manifest +from attic.key import parser from attic.remote import RemoteRepository, PathNotAllowed from attic.repository import Repository from attic.testsuite import AtticTestCase @@ -359,8 +360,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): hash = sha256(data).digest() if not hash in seen: seen.add(hash) - num_blocks = num_aes_blocks(len(data) - 4 - 40) - nonce = bytes_to_long(data[4+32:4+40]) + meta, data, _, _, _ = parser(data) + num_blocks = num_aes_blocks(len(data)) + nonce = bytes_to_long(meta.stored_iv) for counter in range(nonce, nonce + num_blocks): self.assert_not_in(counter, used) used.add(counter) From 9241fa6336eadf01d4bb9f9173a1c2ea22c3dfb5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 21:20:06 +0100 Subject: [PATCH 17/58] do not store "iv" into on-disk metadata, just stored_iv (lower 64bits) --- attic/key.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/attic/key.py b/attic/key.py index 3f1b4170..247e2451 100644 --- a/attic/key.py +++ b/attic/key.py @@ -19,7 +19,11 @@ except ImportError: from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks from attic.helpers import IntegrityError, get_keys_dir, Error +# we do not store the full IV on disk, as the upper 8 bytes are expected to be +# zero anyway as the full IV is a 128bit counter. PREFIX are the upper 8 bytes, +# stored_iv are the lower 8 Bytes. PREFIX = b'\0' * 8 +Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type, hmac, stored_iv') class UnsupportedPayloadError(Error): @@ -144,9 +148,6 @@ class LzmaCompressor(object): # uses 10..19 in the mapping COMPR_DEFAULT = ZlibCompressor.TYPE + 6 # zlib level 6 -Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type, hmac, iv, stored_iv') - - class KeyBase(object): TYPE = 0x00 # override in derived classes @@ -187,7 +188,7 @@ class PlaintextKey(KeyBase): def encrypt(self, data): meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE, - hmac=None, iv=None, stored_iv=None) + hmac=None, stored_iv=None) data = self.compressor.compress(data) return generate(meta, data) @@ -222,11 +223,10 @@ class AESKeyBase(KeyBase): data = self.compressor.compress(data) self.enc_cipher.reset() stored_iv = self.enc_cipher.iv[8:] - iv = PREFIX + stored_iv data = self.enc_cipher.encrypt(data) hmac = self.maccer(self.enc_hmac_key, stored_iv + data).digest() meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE, - hmac=hmac, iv=iv, stored_iv=stored_iv) + hmac=hmac, stored_iv=stored_iv) return generate(meta, data) def decrypt(self, id, data): @@ -236,7 +236,7 @@ class AESKeyBase(KeyBase): computed_hmac = self.maccer(self.enc_hmac_key, meta.stored_iv + data).digest() if computed_hmac != meta.hmac: raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=meta.iv) + self.dec_cipher.reset(iv=PREFIX + meta.stored_iv) data = self.compressor.decompress(self.dec_cipher.decrypt(data)) if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') @@ -486,10 +486,9 @@ def legacy_parser(all_data, crypt_type): # all rather hardcoded else: hmac = all_data[offset:offset+32] stored_iv = all_data[offset+32:offset+40] - iv = PREFIX + stored_iv data = all_data[offset+40:] meta = Meta(compr_type=6, crypt_type=crypt_type, mac_type=HMAC_SHA256.TYPE, - hmac=hmac, iv=iv, stored_iv=stored_iv) + hmac=hmac, stored_iv=stored_iv) compressor, crypter, maccer = get_implementations(meta) return meta, data, compressor, crypter, maccer From 7d94677993158b0e758762ece4f36a49faf87b7b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 22:18:45 +0100 Subject: [PATCH 18/58] move layout docstrings to where they belong and update them --- attic/key.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/attic/key.py b/attic/key.py index 247e2451..21ae4bbe 100644 --- a/attic/key.py +++ b/attic/key.py @@ -206,13 +206,6 @@ class AESKeyBase(KeyBase): """Common base class shared by KeyfileKey and PassphraseKey Chunks are encrypted using 256bit AES in Counter Mode (CTR) - - Payload layout: HEADER(4) + HMAC(32) + NONCE(8) + CIPHERTEXT - - To reduce payload size only 8 bytes of the 16 bytes nonce is saved - in the payload, the first 8 bytes are always zeros. This does not - affect security but limits the maximum repository capacity to - only 295 exabytes! """ def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -478,6 +471,17 @@ def get_implementations(meta): def legacy_parser(all_data, crypt_type): # all rather hardcoded + """ + Payload layout: + no encryption: TYPE(1) + data + with encryption: TYPE(1) + HMAC(32) + NONCE(8) + data + data is compressed with zlib level 6 and (in the 2nd case) encrypted. + + To reduce payload size only 8 bytes of the 16 bytes nonce is saved + in the payload, the first 8 bytes are always zeros. This does not + affect security but limits the maximum repository capacity to + only 295 exabytes! + """ offset = 1 if crypt_type == PlaintextKey.TYPE: hmac = None @@ -503,6 +507,13 @@ def parser02(all_data): def parser03(all_data): # new & flexible + """ + Payload layout: + always: TYPE(1) + MSGPACK((meta, data)) + + meta is a Meta namedtuple and contains all required information about data. + data is maybe compressed (see meta) and maybe encrypted (see meta). + """ meta_tuple, data = msgpack.unpackb(all_data[1:]) meta = Meta(*meta_tuple) compressor, crypter, maccer = get_implementations(meta) From db39a5f33a24ecf1b68a8714de705984a3f6a94b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 23:08:41 +0100 Subject: [PATCH 19/58] add help for compression and mac methods --- attic/archiver.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index f0d82662..c5425c70 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -463,7 +463,20 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") init_epilog = textwrap.dedent(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. - Encryption can be enabled at repository init time. + Encryption can be enabled, compression and mac method can be chosen at + repository init time. + + --compression METHODs (default: zlib level 6): + + - 00..09 zlib levels 0..9 (0 means no compression, 9 max. compression) + - 10..19 lzma levels 0..9 (0 means no compression, 9 max. compression) + + --mac METHODs (default: sha256 or hmac-sha256): + + - 0 sha256 (just simple hash, no MAC, faster on 32bit CPU) + - 1 sha512-256 (just simple hash, no MAC, faster on 64bit CPU) + - 2 hmac-sha256 (HMAC, faster on 32bit CPU) + - 3 hmac-sha512-256 (HMAC, faster on 64bit CPU) """) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, From b53a620602020bc47758b3ab247095999a528617 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 Mar 2015 23:18:10 +0100 Subject: [PATCH 20/58] renumber hash/mac numbers, automate defaults in help note: maybe we add the full (not truncated) sha512 hash and hmac-sha512, keep the numbers free. --- attic/archiver.py | 16 ++++++++-------- attic/key.py | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index c5425c70..46831a4c 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -13,7 +13,7 @@ from attic import __version__ from attic.archive import Archive, ArchiveChecker from attic.repository import Repository from attic.cache import Cache -from attic.key import key_creator, COMPR_DEFAULT +from attic.key import key_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ @@ -466,18 +466,18 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") Encryption can be enabled, compression and mac method can be chosen at repository init time. - --compression METHODs (default: zlib level 6): + --compression METHODs (default: %02d): - 00..09 zlib levels 0..9 (0 means no compression, 9 max. compression) - 10..19 lzma levels 0..9 (0 means no compression, 9 max. compression) - --mac METHODs (default: sha256 or hmac-sha256): + --mac METHODs (default: %02d or %02d): - - 0 sha256 (just simple hash, no MAC, faster on 32bit CPU) - - 1 sha512-256 (just simple hash, no MAC, faster on 64bit CPU) - - 2 hmac-sha256 (HMAC, faster on 32bit CPU) - - 3 hmac-sha512-256 (HMAC, faster on 64bit CPU) - """) + - 00 sha256 (just simple hash, no MAC, faster on 32bit CPU) + - 01 sha512-256 (just simple hash, no MAC, faster on 64bit CPU) + - 10 hmac-sha256 (HMAC, faster on 32bit CPU) + - 11 hmac-sha512-256 (HMAC, faster on 64bit CPU) + """ % (COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) diff --git a/attic/key.py b/attic/key.py index 21ae4bbe..8a8c476a 100644 --- a/attic/key.py +++ b/attic/key.py @@ -64,7 +64,7 @@ class HMAC(hmac.HMAC): class SHA256(object): # note: can't subclass sha256 - TYPE = 0x00 + TYPE = 0 def __init__(self, key, data=b''): # signature is like for a MAC, we ignore the key as this is a simple hash @@ -84,7 +84,7 @@ class SHA256(object): # note: can't subclass sha256 class SHA512_256(sha512_256): """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" - TYPE = 0x01 + TYPE = 1 def __init__(self, key, data): # signature is like for a MAC, we ignore the key as this is a simple hash @@ -97,7 +97,7 @@ HASH_DEFAULT = SHA256.TYPE class HMAC_SHA256(HMAC): - TYPE = 0x02 + TYPE = 10 def __init__(self, key, data): if key is None: @@ -106,7 +106,7 @@ class HMAC_SHA256(HMAC): class HMAC_SHA512_256(HMAC): - TYPE = 0x03 + TYPE = 11 def __init__(self, key, data): if key is None: From 8032089d6275a934469603fa9faa913a7f1b40ed Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 12 Mar 2015 12:20:26 +0100 Subject: [PATCH 21/58] add remark about potential memory issue with tampered input --- attic/key.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/attic/key.py b/attic/key.py index 8a8c476a..e5815b99 100644 --- a/attic/key.py +++ b/attic/key.py @@ -514,6 +514,11 @@ def parser03(all_data): # new & flexible meta is a Meta namedtuple and contains all required information about data. data is maybe compressed (see meta) and maybe encrypted (see meta). """ + # TODO use Unpacker(..., max_*_len=NOTMORETHANNEEDED) to avoid any memory + # allocation issues on untrusted and potentially tampered input data. + # Problem: we currently must use older msgpack because pure python impl. + # is broken in 0.4.2 < version <= 0.4.5, but this api is only offered by + # more recent ones, not by 0.4.2. So, fix here when 0.4.6 is out. :-( meta_tuple, data = msgpack.unpackb(all_data[1:]) meta = Meta(*meta_tuple) compressor, crypter, maccer = get_implementations(meta) From 4cdb1c37babfa4888c5dda9ba177bf4f5ff80b49 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 12 Mar 2015 19:03:27 +0100 Subject: [PATCH 22/58] use GMAC as default for now, integrate into maccer type system, adapt tests --- attic/archiver.py | 1 + attic/key.py | 8 +++++--- attic/testsuite/key.py | 20 ++++++++++---------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 001d9b43..16f7072d 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -494,6 +494,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") - 01 sha512-256 (just simple hash, no MAC, faster on 64bit CPU) - 10 hmac-sha256 (HMAC, faster on 32bit CPU) - 11 hmac-sha512-256 (HMAC, faster on 64bit CPU) + - 20 gmac (MAC, fastest on CPUs with AES-GCM HW support) """ % (COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, diff --git a/attic/key.py b/attic/key.py index a2e2ef61..8cb92b00 100644 --- a/attic/key.py +++ b/attic/key.py @@ -115,6 +115,8 @@ class HMAC_SHA512_256(HMAC): class GMAC: + TYPE = 20 + def __init__(self, key, data): if key is None: raise Exception("do not use GMAC if you don't have a key") @@ -129,7 +131,7 @@ class GMAC: return tag -MAC_DEFAULT = HMAC_SHA256.TYPE +MAC_DEFAULT = GMAC.TYPE class ZlibCompressor(object): # uses 0..9 in the mapping @@ -238,8 +240,7 @@ class AESKeyBase(KeyBase): only 295 exabytes! """ def id_hash(self, data): - return GMAC(self.id_key, data).digest() - #return self.maccer(self.id_key, data).digest() + return self.maccer(self.id_key, data).digest() def encrypt(self, data): data = self.compressor.compress(data) @@ -500,6 +501,7 @@ maccer_mapping = { # MACs: HMAC_SHA256.TYPE: HMAC_SHA256, HMAC_SHA512_256.TYPE: HMAC_SHA512_256, + GMAC.TYPE: GMAC, } diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index a169e0bd..aec8ff85 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -18,19 +18,19 @@ class KeyTestCase(AtticTestCase): keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 -hqppdGVyYXRpb25zzgABhqCkc2FsdNoAICiRWfijWqIuvr+70VzOsUS4Y6NM45FWm6LgCu -2GyalGqWFsZ29yaXRobaRnbWFjpGhhc2jaACDgCK7u30Pi+Du1qHRyWBupAAAAAAAAAAAA -AAAAAAAAAKd2ZXJzaW9uAaRkYXRh2gDQrlCtq2mzdmkuhwIoko5+amxYqnlfNHHZxRFiX9 -F8AliP7H6S0j9uHyrBKRDWtj7VGYWVW8COy/FncLRgRhspB59rH3y/GS6pfeEw7RWUPd32 -eOcB6v8q+IHUvGttyFRcN6PxSFHBhOKN0jqStP0UqXLv+d9rGWi6X/HNZGu9WPkqs/g0G9 -xnf48i9pOy19aQo3HV//ubf+VYWmc1J8zjCS2Og0JkMtxbqM6j4mShPjkURZZBXSJGtORV -5IzNAzixJWmr8LR12TmFGVb0U9P79A==""".strip() +hqlhbGdvcml0aG2kZ21hY6d2ZXJzaW9uAaRkYXRh2gDQByfRqTSTSlAic/cXPGs0IsqVb+ +Zi/U16d6T+dUBtRHFjaFCJqtY+CPWiv2BD35cZop4TImLdGYcGvOAVOzdGKL7n8dTVnI0G +jnapbvt8NBYRhXV9G3hFMTLjncJoHLQwHSXkVoG/UjBWHf9pcyhfSdWAyePkWrfk0K+O97 +/MGvYdUDeMju89c7SZKOD4PVZ+gG9ILpmI0SvCciptAX2ZrNqeJ3AkqoVzThT7VsNwbpHF +j7MgZ5hWAqLA+PkEZ39jnchWWm2dxJMkjUmeAGjoiappdGVyYXRpb25zzgABhqCkc2FsdN +oAIHxtfNeGPOnhza/lXT492RZEVFmm2hewR0MwDhI6DQAopGhhc2jaACAtXRPR5mDd71wp +xNWoIlr9AAAAAAAAAAAAAAAAAAAAAA==""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 004078370be366ac3ad9d147992be8ebee000000000000000000000000000000000000000000000000 - b94bfb5d0a63b0c47cf74e2d0585aa + 039295060014da0020c772252fb7a88e06be0e1e371168fb5b00000000000000000000000000000000 + a80000000000000000af86399e604aa35fed0fae7bc02b39ae """)) - keyfile2_id = unhexlify('45f309b4ef353c467d16a19039b87e5400000000000000000000000000000000') + keyfile2_id = unhexlify('b1927b00c683abe7b40ec4cc3d8c8f2f00000000000000000000000000000000') def setUp(self): self.tmppath = tempfile.mkdtemp() From f0428457626e4cb75509c938d51484f4838ce76b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 13 Mar 2015 22:30:22 +0100 Subject: [PATCH 23/58] make the cipher variable, uses AEAD-like interface for authentication/encryption --- attic/archiver.py | 14 ++- attic/key.py | 214 ++++++++++++++++++++++-------------- attic/testsuite/archive.py | 1 + attic/testsuite/archiver.py | 2 +- attic/testsuite/key.py | 21 ++-- 5 files changed, 156 insertions(+), 96 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 16f7072d..1a84ce52 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -13,7 +13,7 @@ from attic import __version__ from attic.archive import Archive, ArchiveChecker from attic.repository import Repository from attic.cache import Cache -from attic.key import key_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT +from attic.key import key_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ @@ -59,7 +59,6 @@ class Archiver: repository = self.open_repository(args.repository, create=True, exclusive=True) key = key_creator(repository, args) manifest = Manifest(key, repository) - manifest.key = key manifest.write() repository.commit() return self.exit_code @@ -488,6 +487,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") - 00..09 zlib levels 0..9 (0 means no compression, 9 max. compression) - 10..19 lzma levels 0..9 (0 means no compression, 9 max. compression) + --cipher METHODs (default: %02d or %02d) + + - 00 No encryption + - 01 AEAD: AES-CTR + HMAC-SHA256 + - 02 AEAD: AES-GCM + --mac METHODs (default: %02d or %02d): - 00 sha256 (just simple hash, no MAC, faster on 32bit CPU) @@ -495,7 +500,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") - 10 hmac-sha256 (HMAC, faster on 32bit CPU) - 11 hmac-sha512-256 (HMAC, faster on 64bit CPU) - 20 gmac (MAC, fastest on CPUs with AES-GCM HW support) - """ % (COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) + """ % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -506,6 +511,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-e', '--encryption', dest='encryption', choices=('none', 'passphrase', 'keyfile'), default='none', help='select encryption method') + subparser.add_argument('-C', '--cipher', dest='cipher', + type=int, default=None, metavar='METHOD', + help='select cipher (0..2)') subparser.add_argument('-c', '--compression', dest='compression', type=int, default=COMPR_DEFAULT, metavar='METHOD', help='select compression method (0..19)') diff --git a/attic/key.py b/attic/key.py index 8cb92b00..f7eca481 100644 --- a/attic/key.py +++ b/attic/key.py @@ -23,7 +23,7 @@ from attic.helpers import IntegrityError, get_keys_dir, Error # zero anyway as the full IV is a 128bit counter. PREFIX are the upper 8 bytes, # stored_iv are the lower 8 Bytes. PREFIX = b'\0' * 8 -Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type, hmac, stored_iv') +Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type, cipher_type, hmac, stored_iv') class UnsupportedPayloadError(Error): @@ -124,7 +124,7 @@ class GMAC: self.data = data def digest(self): - mac_cipher = AES(is_encrypt=True, key=self.key, iv=b'\0'*16) # XXX do we need an IV here? + mac_cipher = AES(is_encrypt=True, key=self.key, iv=b'\0' * 16) # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data mac_cipher.add(bytes(self.data)) tag, _ = mac_cipher.compute_tag_and_encrypt(b'') @@ -165,12 +165,67 @@ class LzmaCompressor(object): # uses 10..19 in the mapping COMPR_DEFAULT = ZlibCompressor.TYPE + 6 # zlib level 6 +class PLAIN: + TYPE = 0 + + def __init__(self, **kw): + pass + + def compute_tag_and_encrypt(self, data): + return b'', b'', data + + def check_tag_and_decrypt(self, tag, iv_last8, data): + return data + + +class AES_CTR_HMAC: + TYPE = 1 + # TODO + + +class AES_GCM: + TYPE = 2 + + def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, **kw): + # note: hmac_key is not used for aes-gcm, it does aes+gmac in 1 pass + self.enc_iv = enc_iv + self.enc_cipher = AES(is_encrypt=True, key=enc_key, iv=enc_iv) + self.dec_cipher = AES(is_encrypt=False, key=enc_key) + + def compute_tag_and_encrypt(self, data): + self.enc_cipher.reset(iv=self.enc_iv) + iv_last8 = self.enc_iv[8:] + self.enc_cipher.add(iv_last8) + tag, data = self.enc_cipher.compute_tag_and_encrypt(data) + # increase the IV (counter) value so same value is never used twice + current_iv = bytes_to_long(iv_last8) + self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) + return tag, iv_last8, data + + def check_tag_and_decrypt(self, tag, iv_last8, data): + iv = PREFIX + iv_last8 + self.dec_cipher.reset(iv=iv) + self.dec_cipher.add(iv_last8) + try: + data = self.dec_cipher.check_tag_and_decrypt(tag, data) + except Exception: + raise IntegrityError('Encryption envelope checksum mismatch') + return data + + +PLAIN_DEFAULT = PLAIN.TYPE +CIPHER_DEFAULT = AES_GCM.TYPE + + class KeyBase(object): TYPE = 0x00 # override in derived classes - def __init__(self, compressor, maccer): - self.compressor = compressor() - self.maccer = maccer + def __init__(self, compressor_cls, maccer_cls, cipher_cls): + self.compressor = compressor_cls() + self.maccer_cls = maccer_cls # hasher/maccer used by id_hash + self.cipher_cls = cipher_cls # plaintext dummy or AEAD cipher + self.cipher = cipher_cls() + self.id_key = None def id_hash(self, data): """Return a HASH (no id_key) or a MAC (using the "id_key" key) @@ -183,12 +238,26 @@ class KeyBase(object): exists in the repo for other data created with another id_hash method. somehow unlikely considering 128 or 256bits, but still. """ + return self.maccer_cls(self.id_key, data).digest() def encrypt(self, data): - pass + data = self.compressor.compress(data) + tag, iv_last8, data = self.cipher.compute_tag_and_encrypt(data) + meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, + mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE, + hmac=tag, stored_iv=iv_last8) + return generate(meta, data) def decrypt(self, id, data): - pass + meta, data, compressor, crypter, maccer, cipher = parser(data) + assert isinstance(self, crypter) + assert self.maccer_cls is maccer + assert self.cipher_cls is cipher + data = self.cipher.check_tag_and_decrypt(meta.hmac, meta.stored_iv, data) + data = self.compressor.decompress(data) + if id and self.id_hash(data) != id: + raise IntegrityError('Chunk id verification failed') + return data class PlaintextKey(KeyBase): @@ -201,30 +270,13 @@ class PlaintextKey(KeyBase): print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.') compressor = compressor_creator(args) maccer = maccer_creator(args, cls) - return cls(compressor, maccer) + cipher = cipher_creator(args, cls) + return cls(compressor, maccer, cipher) @classmethod def detect(cls, repository, manifest_data): - meta, data, compressor, crypter, maccer = parser(manifest_data) - return cls(compressor, maccer) - - def id_hash(self, data): - return self.maccer(None, data).digest() - - def encrypt(self, data): - meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE, - hmac=None, stored_iv=None) - data = self.compressor.compress(data) - return generate(meta, data) - - def decrypt(self, id, data): - meta, data, compressor, crypter, maccer = parser(data) - assert isinstance(self, crypter) - assert self.maccer is maccer - data = self.compressor.decompress(data) - if id and self.id_hash(data) != id: - raise IntegrityError('Chunk id verification failed') - return data + meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) + return cls(compressor, maccer, cipher) class AESKeyBase(KeyBase): @@ -239,59 +291,28 @@ class AESKeyBase(KeyBase): affect security but limits the maximum repository capacity to only 295 exabytes! """ - def id_hash(self, data): - return self.maccer(self.id_key, data).digest() - - def encrypt(self, data): - data = self.compressor.compress(data) - self.enc_cipher.reset(iv=self.enc_iv) - iv_last8 = self.enc_iv[8:] - self.enc_cipher.add(iv_last8) - tag, data = self.enc_cipher.compute_tag_and_encrypt(data) - # increase the IV (counter) value so same value is never used twice - current_iv = bytes_to_long(iv_last8) - self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) - meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE, - hmac=tag, stored_iv=iv_last8) - return generate(meta, data) - - def decrypt(self, id, data): - meta, data, compressor, crypter, maccer = parser(data) - assert isinstance(self, crypter) - assert self.maccer is maccer - iv_last8 = meta.stored_iv - iv = PREFIX + iv_last8 - self.dec_cipher.reset(iv=iv) - self.dec_cipher.add(iv_last8) - tag = meta.hmac # TODO rename Meta element name to be generic - try: - data = self.dec_cipher.check_tag_and_decrypt(tag, data) - except Exception: - raise IntegrityError('Encryption envelope checksum mismatch') - data = self.compressor.decompress(data) - if id and self.id_hash(data) != id: - raise IntegrityError('Chunk id verification failed') - return data - def extract_nonce(self, payload): - meta, data, compressor, crypter, maccer = parser(payload) + meta, data, compressor, crypter, maccer, cipher = parser(payload) assert isinstance(self, crypter) nonce = bytes_to_long(meta.stored_iv) return nonce def init_from_random_data(self, data): self.enc_key = data[0:32] - self.enc_hmac_key = data[32:64] # XXX enc_hmac_key not used for AES-GCM + self.enc_hmac_key = data[32:64] self.id_key = data[64:96] self.chunk_seed = bytes_to_int(data[96:100]) # Convert to signed int32 if self.chunk_seed & 0x80000000: self.chunk_seed = self.chunk_seed - 0xffffffff - 1 - def init_ciphers(self, enc_iv=PREFIX * 2): # default IV = 16B zero - self.enc_iv = enc_iv - self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv) - self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) + def init_ciphers(self, enc_iv=b'\0' * 16): + self.cipher = self.cipher_cls(enc_key=self.enc_key, enc_iv=enc_iv, + enc_hmac_key=self.enc_hmac_key) + + @property + def enc_iv(self): + return self.cipher.enc_iv class PassphraseKey(AESKeyBase): @@ -302,7 +323,8 @@ class PassphraseKey(AESKeyBase): def create(cls, repository, args): compressor = compressor_creator(args) maccer = maccer_creator(args, cls) - key = cls(compressor, maccer) + cipher = cipher_creator(args, cls) + key = cls(compressor, maccer, cipher) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is not None: passphrase2 = passphrase @@ -324,8 +346,8 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - meta, data, compressor, crypter, maccer = parser(manifest_data) - key = cls(compressor, maccer) + meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) + key = cls(compressor, maccer, cipher) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is None: passphrase = getpass(prompt) @@ -356,8 +378,8 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data, compressor, crypter, maccer = parser(manifest_data) - key = cls(compressor, maccer) + meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) + key = cls(compressor, maccer, cipher) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path passphrase = os.environ.get('ATTIC_PASSPHRASE', '') @@ -467,7 +489,8 @@ class KeyfileKey(AESKeyBase): print('Passphrases do not match') compressor = compressor_creator(args) maccer = maccer_creator(args, cls) - key = cls(compressor, maccer) + cipher = cipher_creator(args, cls) + key = cls(compressor, maccer, cipher) key.repository_id = repository.id key.init_from_random_data(get_random_bytes(100)) key.init_ciphers() @@ -505,15 +528,25 @@ maccer_mapping = { } +cipher_mapping = { + # no cipher (but cipher-like class __init__ method signature): + PLAIN.TYPE: PLAIN, + # AEAD cipher implementations + AES_CTR_HMAC.TYPE: AES_CTR_HMAC, + AES_GCM.TYPE: AES_GCM, +} + + def get_implementations(meta): try: compressor = compressor_mapping[meta.compr_type] crypter = crypter_mapping[meta.crypt_type] maccer = maccer_mapping[meta.mac_type] + cipher = cipher_mapping[meta.cipher_type] except KeyError: raise UnsupportedPayloadError("compr_type %x crypt_type %x mac_type %x" % ( - meta.compr_type, meta.crypt_type, meta.mac_type)) - return compressor, crypter, maccer + meta.compr_type, meta.crypt_type, meta.mac_type, meta.cipher_type)) + return compressor, crypter, maccer, cipher def legacy_parser(all_data, crypt_type): # all rather hardcoded @@ -537,10 +570,11 @@ def legacy_parser(all_data, crypt_type): # all rather hardcoded hmac = all_data[offset:offset+32] stored_iv = all_data[offset+32:offset+40] data = all_data[offset+40:] - meta = Meta(compr_type=6, crypt_type=crypt_type, mac_type=HMAC_SHA256.TYPE, + meta = Meta(compr_type=6, crypt_type=crypt_type, + mac_type=HMAC_SHA256.TYPE, cipher_type=AES_CTR_HMAC.TYPE, hmac=hmac, stored_iv=stored_iv) - compressor, crypter, maccer = get_implementations(meta) - return meta, data, compressor, crypter, maccer + compressor, crypter, maccer, cipher = get_implementations(meta) + return meta, data, compressor, crypter, maccer, cipher def parser00(all_data): return legacy_parser(all_data, KeyfileKey.TYPE) @@ -567,8 +601,8 @@ def parser03(all_data): # new & flexible # more recent ones, not by 0.4.2. So, fix here when 0.4.6 is out. :-( meta_tuple, data = msgpack.unpackb(all_data[1:]) meta = Meta(*meta_tuple) - compressor, crypter, maccer = get_implementations(meta) - return meta, data, compressor, crypter, maccer + compressor, crypter, maccer, cipher = get_implementations(meta) + return meta, data, compressor, crypter, maccer, cipher def parser(data): @@ -584,7 +618,7 @@ def parser(data): def key_factory(repository, manifest_data): - meta, data, compressor, crypter, maccer = parser(manifest_data) + meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) return crypter.detect(repository, manifest_data) @@ -626,3 +660,19 @@ def maccer_creator(args, key_cls): if maccer is None: raise NotImplementedError("no mac %d" % args.mac) return maccer + + +def cipher_creator(args, key_cls): + # args == None is used by unit tests + cipher = None if args is None else args.cipher + if cipher is None: + if key_cls is PlaintextKey: + cipher = PLAIN_DEFAULT + elif key_cls in (KeyfileKey, PassphraseKey): + cipher = CIPHER_DEFAULT + else: + raise NotImplementedError("unknown key class") + cipher = cipher_mapping.get(cipher) + if cipher is None: + raise NotImplementedError("no cipher %d" % args.cipher) + return cipher diff --git a/attic/testsuite/archive.py b/attic/testsuite/archive.py index 5d478dc0..b531ab87 100644 --- a/attic/testsuite/archive.py +++ b/attic/testsuite/archive.py @@ -20,6 +20,7 @@ class ChunkBufferTestCase(AtticTestCase): repository = None compression = COMPR_DEFAULT mac = None + cipher = None def test(self): data = [{b'foo': 1}, {b'bar': 2}] diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 75773402..4d2c1026 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -379,7 +379,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): hash = sha256(data).digest() if not hash in seen: seen.add(hash) - meta, data, _, _, _ = parser(data) + meta, data, _, _, _, _ = parser(data) num_blocks = num_aes_blocks(len(data)) nonce = bytes_to_long(meta.stored_iv) for counter in range(nonce, nonce + num_blocks): diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index aec8ff85..58b1e27d 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -15,22 +15,23 @@ class KeyTestCase(AtticTestCase): repository = Location(tempfile.mkstemp()[1]) compression = COMPR_DEFAULT mac = None + cipher = None keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 -hqlhbGdvcml0aG2kZ21hY6d2ZXJzaW9uAaRkYXRh2gDQByfRqTSTSlAic/cXPGs0IsqVb+ -Zi/U16d6T+dUBtRHFjaFCJqtY+CPWiv2BD35cZop4TImLdGYcGvOAVOzdGKL7n8dTVnI0G -jnapbvt8NBYRhXV9G3hFMTLjncJoHLQwHSXkVoG/UjBWHf9pcyhfSdWAyePkWrfk0K+O97 -/MGvYdUDeMju89c7SZKOD4PVZ+gG9ILpmI0SvCciptAX2ZrNqeJ3AkqoVzThT7VsNwbpHF -j7MgZ5hWAqLA+PkEZ39jnchWWm2dxJMkjUmeAGjoiappdGVyYXRpb25zzgABhqCkc2FsdN -oAIHxtfNeGPOnhza/lXT492RZEVFmm2hewR0MwDhI6DQAopGhhc2jaACAtXRPR5mDd71wp -xNWoIlr9AAAAAAAAAAAAAAAAAAAAAA==""".strip() +hqppdGVyYXRpb25zzgABhqCpYWxnb3JpdGhtpGdtYWOkaGFzaNoAII1CqUnJzgKISX3lwR ++wWqMAAAAAAAAAAAAAAAAAAAAApGRhdGHaANBGe/oYLxHbAq72vjwEpgNMV73dTMkZkYh4 +0WtFC65DwZmqvwbwBBaq1g+fiym+khRtrn9hZvF6rpjk0RrAURSxCXIt/XUNQzQlcQjYbb +kTT0aFk3DkKbwA/pgx10s/nWBmz9xv4yT5uoewOdPV009nJnrLdIz1zJTPvy2ylejHF3Na +Sy/B/tWA9PIeRZzrDe/lVY6YBs8lKz1jtT/3vCJFCa+LOSSJHV+tExnpgO0NBTxDmTckRe +vk3IRPVUml5VXHoUYEUEj6QpBA2F4NKdSzpHNhbHTaACDh3gxO3vgi+K/KMmBebec6RhBy +QQWJNlInT3+yKnQpdqd2ZXJzaW9uAQ==""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 039295060014da0020c772252fb7a88e06be0e1e371168fb5b00000000000000000000000000000000 - a80000000000000000af86399e604aa35fed0fae7bc02b39ae + 03929606001402da002046c635e7ce41b65c5c075fa6afb97f5100000000000000000000000000000000 + a80000000000000000affb14944408753093ba2860edb49220 """)) - keyfile2_id = unhexlify('b1927b00c683abe7b40ec4cc3d8c8f2f00000000000000000000000000000000') + keyfile2_id = unhexlify('94899966ce3eaad825f37500c8c87ef100000000000000000000000000000000') def setUp(self): self.tmppath = tempfile.mkdtemp() From c1c94a8682fa9600cd8dd3cd8191f7a9d7fe3345 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 13 Mar 2015 23:13:41 +0100 Subject: [PATCH 24/58] cosmetics: comments, rename crypt... -> key... --- attic/key.py | 62 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/attic/key.py b/attic/key.py index f7eca481..a810068f 100644 --- a/attic/key.py +++ b/attic/key.py @@ -23,7 +23,7 @@ from attic.helpers import IntegrityError, get_keys_dir, Error # zero anyway as the full IV is a 128bit counter. PREFIX are the upper 8 bytes, # stored_iv are the lower 8 Bytes. PREFIX = b'\0' * 8 -Meta = namedtuple('Meta', 'compr_type, crypt_type, mac_type, cipher_type, hmac, stored_iv') +Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, hmac, stored_iv') class UnsupportedPayloadError(Error): @@ -63,6 +63,9 @@ class HMAC(hmac.HMAC): self.inner.update(msg) +# HASH / MAC stuff below all has a mac-like interface, so it can be used in the same way. +# special case: hashes do not use keys (and thus, do not sign/authenticate) + class SHA256(object): # note: can't subclass sha256 TYPE = 0 @@ -93,9 +96,6 @@ class SHA512_256(sha512_256): super().__init__(data) -HASH_DEFAULT = SHA256.TYPE - - class HMAC_SHA256(HMAC): TYPE = 10 @@ -131,9 +131,13 @@ class GMAC: return tag +HASH_DEFAULT = SHA256.TYPE MAC_DEFAULT = GMAC.TYPE +# compressor classes, all same interface +# special case: zlib level 0 is "no compression" + class ZlibCompressor(object): # uses 0..9 in the mapping TYPE = 0 LEVELS = range(10) @@ -165,6 +169,9 @@ class LzmaCompressor(object): # uses 10..19 in the mapping COMPR_DEFAULT = ZlibCompressor.TYPE + 6 # zlib level 6 +# ciphers - AEAD (authenticated encryption with assoc. data) style interface +# special case: PLAIN dummy does not encrypt / authenticate + class PLAIN: TYPE = 0 @@ -217,6 +224,9 @@ PLAIN_DEFAULT = PLAIN.TYPE CIPHER_DEFAULT = AES_GCM.TYPE +# misc. types of keys +# special case: no keys (thus: no encryption, no signing/authentication) + class KeyBase(object): TYPE = 0x00 # override in derived classes @@ -243,14 +253,14 @@ class KeyBase(object): def encrypt(self, data): data = self.compressor.compress(data) tag, iv_last8, data = self.cipher.compute_tag_and_encrypt(data) - meta = Meta(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, + meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE, mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE, hmac=tag, stored_iv=iv_last8) return generate(meta, data) def decrypt(self, id, data): - meta, data, compressor, crypter, maccer, cipher = parser(data) - assert isinstance(self, crypter) + meta, data, compressor, keyer, maccer, cipher = parser(data) + assert isinstance(self, keyer) assert self.maccer_cls is maccer assert self.cipher_cls is cipher data = self.cipher.check_tag_and_decrypt(meta.hmac, meta.stored_iv, data) @@ -275,7 +285,7 @@ class PlaintextKey(KeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) + meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) return cls(compressor, maccer, cipher) @@ -292,8 +302,8 @@ class AESKeyBase(KeyBase): only 295 exabytes! """ def extract_nonce(self, payload): - meta, data, compressor, crypter, maccer, cipher = parser(payload) - assert isinstance(self, crypter) + meta, data, compressor, keyer, maccer, cipher = parser(payload) + assert isinstance(self, keyer) nonce = bytes_to_long(meta.stored_iv) return nonce @@ -346,7 +356,7 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) + meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) key = cls(compressor, maccer, cipher) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is None: @@ -378,7 +388,7 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) + meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) key = cls(compressor, maccer, cipher) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path @@ -510,7 +520,7 @@ for preset in LzmaCompressor.PRESETS: type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset)) -crypter_mapping = { +keyer_mapping = { KeyfileKey.TYPE: KeyfileKey, PassphraseKey.TYPE: PassphraseKey, PlaintextKey.TYPE: PlaintextKey, @@ -540,16 +550,16 @@ cipher_mapping = { def get_implementations(meta): try: compressor = compressor_mapping[meta.compr_type] - crypter = crypter_mapping[meta.crypt_type] + keyer = keyer_mapping[meta.key_type] maccer = maccer_mapping[meta.mac_type] cipher = cipher_mapping[meta.cipher_type] except KeyError: - raise UnsupportedPayloadError("compr_type %x crypt_type %x mac_type %x" % ( - meta.compr_type, meta.crypt_type, meta.mac_type, meta.cipher_type)) - return compressor, crypter, maccer, cipher + raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x" % ( + meta.compr_type, meta.key_type, meta.mac_type, meta.cipher_type)) + return compressor, keyer, maccer, cipher -def legacy_parser(all_data, crypt_type): # all rather hardcoded +def legacy_parser(all_data, key_type): # all rather hardcoded """ Payload layout: no encryption: TYPE(1) + data @@ -562,7 +572,7 @@ def legacy_parser(all_data, crypt_type): # all rather hardcoded only 295 exabytes! """ offset = 1 - if crypt_type == PlaintextKey.TYPE: + if key_type == PlaintextKey.TYPE: hmac = None iv = stored_iv = None data = all_data[offset:] @@ -570,11 +580,11 @@ def legacy_parser(all_data, crypt_type): # all rather hardcoded hmac = all_data[offset:offset+32] stored_iv = all_data[offset+32:offset+40] data = all_data[offset+40:] - meta = Meta(compr_type=6, crypt_type=crypt_type, + meta = Meta(compr_type=6, key_type=key_type, mac_type=HMAC_SHA256.TYPE, cipher_type=AES_CTR_HMAC.TYPE, hmac=hmac, stored_iv=stored_iv) - compressor, crypter, maccer, cipher = get_implementations(meta) - return meta, data, compressor, crypter, maccer, cipher + compressor, keyer, maccer, cipher = get_implementations(meta) + return meta, data, compressor, keyer, maccer, cipher def parser00(all_data): return legacy_parser(all_data, KeyfileKey.TYPE) @@ -601,8 +611,8 @@ def parser03(all_data): # new & flexible # more recent ones, not by 0.4.2. So, fix here when 0.4.6 is out. :-( meta_tuple, data = msgpack.unpackb(all_data[1:]) meta = Meta(*meta_tuple) - compressor, crypter, maccer, cipher = get_implementations(meta) - return meta, data, compressor, crypter, maccer, cipher + compressor, keyer, maccer, cipher = get_implementations(meta) + return meta, data, compressor, keyer, maccer, cipher def parser(data): @@ -618,8 +628,8 @@ def parser(data): def key_factory(repository, manifest_data): - meta, data, compressor, crypter, maccer, cipher = parser(manifest_data) - return crypter.detect(repository, manifest_data) + meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) + return keyer.detect(repository, manifest_data) def generate(meta, data): From 6aca9383d7266d23071c494adb88bd691d59a505 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 14 Mar 2015 00:52:54 +0100 Subject: [PATCH 25/58] reintegrate AEAD cipher made from AES CTR + HMAC-SHA256 --- attic/crypto.pyx | 40 +++++++++++++++++++++++----------- attic/key.py | 45 +++++++++++++++++++++++++++++++-------- attic/testsuite/crypto.py | 22 +++++++++++++++---- 3 files changed, 82 insertions(+), 25 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index 17161975..d92806f3 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -7,6 +7,9 @@ from libc.stdlib cimport malloc, free API_VERSION = 2 +AES_CTR_MODE = 1 +AES_GCM_MODE = 2 + TAG_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below. IV_SIZE = 16 # bytes; 128 bits @@ -25,6 +28,7 @@ cdef extern from "openssl/evp.h": ctypedef struct ENGINE: pass const EVP_MD *EVP_sha256() + const EVP_CIPHER *EVP_aes_256_ctr() const EVP_CIPHER *EVP_aes_256_gcm() void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a) void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a) @@ -99,12 +103,19 @@ cdef class AES: """ cdef EVP_CIPHER_CTX ctx cdef int is_encrypt + cdef int mode - def __cinit__(self, is_encrypt, key, iv=None): + def __cinit__(self, mode, is_encrypt, key, iv=None): EVP_CIPHER_CTX_init(&self.ctx) + self.mode = mode self.is_encrypt = is_encrypt # Set cipher type and mode - cipher_mode = EVP_aes_256_gcm() + if mode == AES_CTR_MODE: + cipher_mode = EVP_aes_256_ctr() + elif mode == AES_GCM_MODE: + cipher_mode = EVP_aes_256_gcm() + else: + raise Exception('unknown mode') if self.is_encrypt: if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): raise Exception('EVP_EncryptInit_ex failed') @@ -123,9 +134,10 @@ cdef class AES: key2 = key if iv: iv2 = iv - # Set IV length (bytes) - if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL): - raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed') + if self.mode == AES_GCM_MODE: + # Set IV length (bytes) + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL): + raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed') # Initialise key and IV if self.is_encrypt: if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): @@ -137,6 +149,8 @@ cdef class AES: def add(self, aad): cdef int aadl = len(aad) cdef int outl + if self.mode != AES_GCM_MODE: + raise Exception('additional data only supported for AES GCM mode') # Zero or more calls to specify any AAD if self.is_encrypt: if not EVP_EncryptUpdate(&self.ctx, NULL, &outl, aad, aadl): @@ -161,9 +175,10 @@ cdef class AES: if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): raise Exception('EVP_EncryptFinal failed') ctl += outl - # Get tag - if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, TAG_SIZE, tag): - raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed') + if self.mode == AES_GCM_MODE: + # Get tag (only GCM mode. for CTR, the returned tag is undefined) + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, TAG_SIZE, tag): + raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed') # hack: caller wants 32B tags (256b), so we give back that amount return (tag[:TAG_SIZE] + b'\x00'*16), out[:ctl] finally: @@ -184,11 +199,12 @@ cdef class AES: if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): raise Exception('EVP_DecryptUpdate failed') ptl = outl - # Set expected tag value. - if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, TAG_SIZE, tag): - raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed') + if self.mode == AES_GCM_MODE: + # Set expected tag value. + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, TAG_SIZE, tag): + raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed') if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: - # a failure here means corrupted / tampered tag or data + # for GCM mode, a failure here means corrupted / tampered tag or data raise Exception('EVP_DecryptFinal failed') ptl += outl return out[:ptl] diff --git a/attic/key.py b/attic/key.py index a810068f..10042e65 100644 --- a/attic/key.py +++ b/attic/key.py @@ -16,7 +16,8 @@ except ImportError: except ImportError: lzma = None -from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \ + bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks from attic.helpers import IntegrityError, get_keys_dir, Error # we do not store the full IV on disk, as the upper 8 bytes are expected to be @@ -124,7 +125,7 @@ class GMAC: self.data = data def digest(self): - mac_cipher = AES(is_encrypt=True, key=self.key, iv=b'\0' * 16) + mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=self.key, iv=b'\0' * 16) # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data mac_cipher.add(bytes(self.data)) tag, _ = mac_cipher.compute_tag_and_encrypt(b'') @@ -187,7 +188,30 @@ class PLAIN: class AES_CTR_HMAC: TYPE = 1 - # TODO + + def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, enc_hmac_key=b'\0' * 32, **kw): + self.hmac_key = enc_hmac_key + self.enc_iv = enc_iv + self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) + self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key) + + def compute_tag_and_encrypt(self, data): + self.enc_cipher.reset(iv=self.enc_iv) + iv_last8 = self.enc_iv[8:] + _, data = self.enc_cipher.compute_tag_and_encrypt(data) + # increase the IV (counter) value so same value is never used twice + current_iv = bytes_to_long(iv_last8) + self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) + tag = HMAC(self.hmac_key, iv_last8 + data, sha256).digest() # XXX mac / hash flexibility + return tag, iv_last8, data + + def check_tag_and_decrypt(self, tag, iv_last8, data): + iv = PREFIX + iv_last8 + if HMAC(self.hmac_key, iv_last8 + data, sha256).digest() != tag: + raise IntegrityError('Encryption envelope checksum mismatch') + self.dec_cipher.reset(iv=iv) + data = self.dec_cipher.check_tag_and_decrypt(None, data) + return data class AES_GCM: @@ -196,8 +220,8 @@ class AES_GCM: def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, **kw): # note: hmac_key is not used for aes-gcm, it does aes+gmac in 1 pass self.enc_iv = enc_iv - self.enc_cipher = AES(is_encrypt=True, key=enc_key, iv=enc_iv) - self.dec_cipher = AES(is_encrypt=False, key=enc_key) + self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) + self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key) def compute_tag_and_encrypt(self, data): self.enc_cipher.reset(iv=self.enc_iv) @@ -292,9 +316,10 @@ class PlaintextKey(KeyBase): class AESKeyBase(KeyBase): """Common base class shared by KeyfileKey and PassphraseKey - Chunks are encrypted using 256bit AES in Galois Counter Mode (GCM) + Chunks are encrypted using 256bit AES in CTR or GCM mode. + Chunks are authenticated by a GCM GMAC or a HMAC. - Payload layout: TYPE(1) + TAG(32) + NONCE(8) + CIPHERTEXT + Payload layout: TYPE(1) + MAC(32) + NONCE(8) + CIPHERTEXT To reduce payload size only 8 bytes of the 16 bytes nonce is saved in the payload, the first 8 bytes are always zeros. This does not @@ -433,7 +458,8 @@ class KeyfileKey(AESKeyBase): assert d[b'algorithm'] == b'gmac' key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) try: - data = AES(is_encrypt=False, key=key, iv=b'\0'*16).check_tag_and_decrypt(d[b'hash'], d[b'data']) + cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=b'\0'*16) + data = cipher.check_tag_and_decrypt(d[b'hash'], d[b'data']) return data except Exception: return None @@ -442,7 +468,8 @@ class KeyfileKey(AESKeyBase): salt = get_random_bytes(32) iterations = 100000 key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) - tag, cdata = AES(is_encrypt=True, key=key, iv=b'\0'*16).compute_tag_and_encrypt(data) + cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0'*16) + tag, cdata = cipher.compute_tag_and_encrypt(data) d = { 'version': 1, 'salt': salt, diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index f4b9a9cb..a4e6d80f 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -1,6 +1,7 @@ from binascii import hexlify from attic.testsuite import AtticTestCase -from attic.crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes +from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_GCM_MODE, AES_CTR_MODE, \ + bytes_to_long, bytes_to_int, long_to_bytes class CryptoTestCase(AtticTestCase): @@ -27,20 +28,33 @@ class CryptoTestCase(AtticTestCase): self.assert_equal(len(bytes2), 10) self.assert_not_equal(bytes, bytes2) + def test_aes_ctr(self): + key = b'X' * 32 + iv = b'\0' * 16 + data = b'foo' * 10 + # encrypt + aes = AES(mode=AES_CTR_MODE, is_encrypt=True, key=key, iv=iv) + _, cdata = aes.compute_tag_and_encrypt(data) + self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') + # decrypt (correct tag/cdata) + aes = AES(mode=AES_CTR_MODE, is_encrypt=False, key=key, iv=iv) + pdata = aes.check_tag_and_decrypt(None, cdata) + self.assert_equal(data, pdata) + def test_aes_gcm(self): key = b'X' * 32 iv = b'A' * 16 data = b'foo' * 10 # encrypt - aes = AES(is_encrypt=True, key=key, iv=iv) + aes = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=iv) tag, cdata = aes.compute_tag_and_encrypt(data) self.assert_equal(hexlify(tag), b'c98aa10eb6b7031bcc2160878d9438fb00000000000000000000000000000000') self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741') # decrypt (correct tag/cdata) - aes = AES(is_encrypt=False, key=key, iv=iv) + aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv) pdata = aes.check_tag_and_decrypt(tag, cdata) self.assert_equal(data, pdata) # decrypt (incorrect tag/cdata) - aes = AES(is_encrypt=False, key=key, iv=iv) + aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv) cdata = b'x' + cdata[1:] # corrupt cdata self.assertRaises(Exception, aes.check_tag_and_decrypt, tag, cdata) From 8a36478287197f2b57e82773afd9e7ca6a0387e9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 14 Mar 2015 02:48:54 +0100 Subject: [PATCH 26/58] implement GMAC based on GHASH, use GHASH, faster compression, comments note: zlib level 0 is even faster, but crashes one RemoteArchiver unittest, huh!? --- attic/key.py | 37 ++++++++++++++++++++++++++----------- attic/testsuite/key.py | 2 +- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/attic/key.py b/attic/key.py index 10042e65..9f69e2b4 100644 --- a/attic/key.py +++ b/attic/key.py @@ -97,6 +97,24 @@ class SHA512_256(sha512_256): super().__init__(data) +class GHASH: + TYPE = 2 + + def __init__(self, key, data): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a MAC if you have a key") + self.key = b'\0' * 32 + self.data = data + + def digest(self): + mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=self.key, iv=b'\0' * 16) + # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data + mac_cipher.add(bytes(self.data)) + tag, _ = mac_cipher.compute_tag_and_encrypt(b'') + return tag + + class HMAC_SHA256(HMAC): TYPE = 10 @@ -115,24 +133,18 @@ class HMAC_SHA512_256(HMAC): super().__init__(key, data, sha512_256) -class GMAC: +class GMAC(GHASH): TYPE = 20 def __init__(self, key, data): + super().__init__(None, data) if key is None: raise Exception("do not use GMAC if you don't have a key") self.key = key - self.data = data - - def digest(self): - mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=self.key, iv=b'\0' * 16) - # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data - mac_cipher.add(bytes(self.data)) - tag, _ = mac_cipher.compute_tag_and_encrypt(b'') - return tag -HASH_DEFAULT = SHA256.TYPE +# defaults are optimized for speed on modern CPUs with AES hw support +HASH_DEFAULT = GHASH.TYPE MAC_DEFAULT = GMAC.TYPE @@ -167,7 +179,8 @@ class LzmaCompressor(object): # uses 10..19 in the mapping return lzma.decompress(data) -COMPR_DEFAULT = ZlibCompressor.TYPE + 6 # zlib level 6 +# default is optimized for speed (and a little compression) +COMPR_DEFAULT = ZlibCompressor.TYPE + 1 # zlib level 1 # ciphers - AEAD (authenticated encryption with assoc. data) style interface @@ -244,6 +257,7 @@ class AES_GCM: return data +# cipher default is optimized for speed on modern CPUs with AES hw support PLAIN_DEFAULT = PLAIN.TYPE CIPHER_DEFAULT = AES_GCM.TYPE @@ -558,6 +572,7 @@ maccer_mapping = { # simple hashes, not MACs (but MAC-like class __init__ method signature): SHA256.TYPE: SHA256, SHA512_256.TYPE: SHA512_256, + GHASH.TYPE: GHASH, # MACs: HMAC_SHA256.TYPE: HMAC_SHA256, HMAC_SHA512_256.TYPE: HMAC_SHA512_256, diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 58b1e27d..5d6b1517 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -59,7 +59,7 @@ QQWJNlInT3+yKnQpdqd2ZXJzaW9uAQ==""".strip() def test_plaintext(self): key = PlaintextKey.create(None, self.MockArgs()) data = b'foo' - self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae') + self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb300000000000000000000000000000000') self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data))) def test_keyfile(self): From 99daa2794d684969f4cb63212bbed574051c70e5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 14 Mar 2015 22:06:18 +0100 Subject: [PATCH 27/58] for zero compression (and decompression), avoid going through zlib for level 0, zlib.compress is still at some cost, it doesn't just give back the data 1:1. in fact, it even does add some overhead, creating larger output than input. thus, it is replaced by NullCompressor, which really does nothing. --- attic/archiver.py | 5 +++-- attic/key.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 1a84ce52..9c37adfa 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -484,8 +484,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") --compression METHODs (default: %02d): - - 00..09 zlib levels 0..9 (0 means no compression, 9 max. compression) - - 10..19 lzma levels 0..9 (0 means no compression, 9 max. compression) + - 00 no compression + - 01..09 zlib levels 1..9 (1 means low compression, 9 max. compression) + - 10..19 lzma levels 0..9 (0 means low compression, 9 max. compression) --cipher METHODs (default: %02d or %02d) diff --git a/attic/key.py b/attic/key.py index 9f69e2b4..c1096c31 100644 --- a/attic/key.py +++ b/attic/key.py @@ -151,7 +151,17 @@ MAC_DEFAULT = GMAC.TYPE # compressor classes, all same interface # special case: zlib level 0 is "no compression" -class ZlibCompressor(object): # uses 0..9 in the mapping +class NullCompressor(object): # uses 0 in the mapping + TYPE = 0 + + def compress(self, data): + return bytes(data) + + def decompress(self, data): + return bytes(data) + + +class ZlibCompressor(object): # uses 1..9 in the mapping TYPE = 0 LEVELS = range(10) @@ -179,8 +189,8 @@ class LzmaCompressor(object): # uses 10..19 in the mapping return lzma.decompress(data) -# default is optimized for speed (and a little compression) -COMPR_DEFAULT = ZlibCompressor.TYPE + 1 # zlib level 1 +# default is optimized for speed +COMPR_DEFAULT = NullCompressor.TYPE # no compression # ciphers - AEAD (authenticated encryption with assoc. data) style interface @@ -559,6 +569,8 @@ for level in ZlibCompressor.LEVELS: for preset in LzmaCompressor.PRESETS: compressor_mapping[LzmaCompressor.TYPE + preset] = \ type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset)) +# overwrite 0 with NullCompressor +compressor_mapping[NullCompressor.TYPE] = NullCompressor keyer_mapping = { From 61f84efa602a03567b63adf0b0f4d072bce5f1d4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 15 Mar 2015 01:52:56 +0100 Subject: [PATCH 28/58] misc. small fixes, discovered by pycharm removed unused import fixed format string --- attic/key.py | 2 +- attic/testsuite/key.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/attic/key.py b/attic/key.py index c1096c31..0a9bad46 100644 --- a/attic/key.py +++ b/attic/key.py @@ -608,7 +608,7 @@ def get_implementations(meta): maccer = maccer_mapping[meta.mac_type] cipher = cipher_mapping[meta.cipher_type] except KeyError: - raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x" % ( + raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x cipher_type %x" % ( meta.compr_type, meta.key_type, meta.mac_type, meta.cipher_type)) return compressor, keyer, maccer, cipher diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 5d6b1517..b70939d6 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -3,7 +3,7 @@ import re import shutil import tempfile from binascii import hexlify -from attic.crypto import bytes_to_long, num_aes_blocks +from attic.crypto import bytes_to_long from attic.testsuite import AtticTestCase from attic.key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT from attic.helpers import Location, unhexlify From b275d18c6be480e2444703ee227aab6dd8c2f35d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 15 Mar 2015 04:45:52 +0100 Subject: [PATCH 29/58] msgpack: limit unpacker, use bin type for bytes we only need a little metadata and 1 medium sized piece of data, so avoid memory allocation issues that could be caused by tampered input data. bin type is more appropriate for binary data than str type (which could be also encoded text). --- attic/key.py | 21 ++++++++++++++------- attic/testsuite/key.py | 20 ++++++++++---------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/attic/key.py b/attic/key.py index 0a9bad46..5c26e166 100644 --- a/attic/key.py +++ b/attic/key.py @@ -658,12 +658,19 @@ def parser03(all_data): # new & flexible meta is a Meta namedtuple and contains all required information about data. data is maybe compressed (see meta) and maybe encrypted (see meta). """ - # TODO use Unpacker(..., max_*_len=NOTMORETHANNEEDED) to avoid any memory - # allocation issues on untrusted and potentially tampered input data. - # Problem: we currently must use older msgpack because pure python impl. - # is broken in 0.4.2 < version <= 0.4.5, but this api is only offered by - # more recent ones, not by 0.4.2. So, fix here when 0.4.6 is out. :-( - meta_tuple, data = msgpack.unpackb(all_data[1:]) + max_len = 2000000 # XXX formula? + unpacker = msgpack.Unpacker( + use_list=False, + # avoid memory allocation issues causes by tampered input data. + max_buffer_size=max_len, # does not work in 0.4.6 unpackb C implementation + max_array_len=10, # meta_tuple + max_bin_len=max_len, # data + max_str_len=0, # not used yet + max_map_len=0, # not used yet + max_ext_len=0, # not used yet + ) + unpacker.feed(all_data[1:]) + meta_tuple, data = unpacker.unpack() meta = Meta(*meta_tuple) compressor, keyer, maccer, cipher = get_implementations(meta) return meta, data, compressor, keyer, maccer, cipher @@ -688,7 +695,7 @@ def key_factory(repository, manifest_data): def generate(meta, data): # always create new-style 0x03 format - return b'\x03' + msgpack.packb((meta, data)) + return b'\x03' + msgpack.packb((meta, data), use_bin_type=True) def compressor_creator(args): diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index b70939d6..76129402 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -19,19 +19,19 @@ class KeyTestCase(AtticTestCase): keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 -hqppdGVyYXRpb25zzgABhqCpYWxnb3JpdGhtpGdtYWOkaGFzaNoAII1CqUnJzgKISX3lwR -+wWqMAAAAAAAAAAAAAAAAAAAAApGRhdGHaANBGe/oYLxHbAq72vjwEpgNMV73dTMkZkYh4 -0WtFC65DwZmqvwbwBBaq1g+fiym+khRtrn9hZvF6rpjk0RrAURSxCXIt/XUNQzQlcQjYbb -kTT0aFk3DkKbwA/pgx10s/nWBmz9xv4yT5uoewOdPV009nJnrLdIz1zJTPvy2ylejHF3Na -Sy/B/tWA9PIeRZzrDe/lVY6YBs8lKz1jtT/3vCJFCa+LOSSJHV+tExnpgO0NBTxDmTckRe -vk3IRPVUml5VXHoUYEUEj6QpBA2F4NKdSzpHNhbHTaACDh3gxO3vgi+K/KMmBebec6RhBy -QQWJNlInT3+yKnQpdqd2ZXJzaW9uAQ==""".strip() +hqlhbGdvcml0aG2kZ21hY6d2ZXJzaW9uAaRoYXNo2gAgeXkW700i+1t5mroRI9YQuAAAAA +AAAAAAAAAAAAAAAACkZGF0YdoA0FVh2YsC4Nd5Pd+9wm6m/HbXnfy7ahBQNUp/grFY/LN7 +CPZYHM9tblJ40Kklnn6pktJhgEizgOzK435wbRWeuYiLO4+W0AEX74i0GcFafOhN7DyLYA +jE1qQMTm7tK2LlapnKVOOiH3KV67pdSMtRYDrHbx0Gud3jBtfMGU39nuwEFfWwIzQ8b4Tm +SWlG6orGwmvRJn8a5H+JtOY90e+tM7s2M4VF6p8grtUyighYxJrO4Y78/fsDpSHbYAh+en +6GrpcESLKYoDtgqiyjle0LpQ6kc2FsdNoAIKhlgtF1As4InTAsR3bCQif78vGjYYMKerJQ +ge5ZaKvpqml0ZXJhdGlvbnPOAAGGoA==""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 03929606001402da002046c635e7ce41b65c5c075fa6afb97f5100000000000000000000000000000000 - a80000000000000000affb14944408753093ba2860edb49220 + 03929600001402c4207f9b12b337e123e322ca2af795788ee100000000000000000000000000000000 + c4080000000000000000c407624711de25ab38 """)) - keyfile2_id = unhexlify('94899966ce3eaad825f37500c8c87ef100000000000000000000000000000000') + keyfile2_id = unhexlify('4d532cec0eb8ec34d65c5491b5158b1400000000000000000000000000000000') def setUp(self): self.tmppath = tempfile.mkdtemp() From 1e21ad85b4979378e924e00b4493a82e9316da0e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 15 Mar 2015 05:05:19 +0100 Subject: [PATCH 30/58] improve attic init --help --- attic/archiver.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 9c37adfa..e4e35585 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -479,7 +479,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") init_epilog = textwrap.dedent(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. - Encryption can be enabled, compression and mac method can be chosen at + Encryption can be enabled, compression, cipher and mac method can be chosen at repository init time. --compression METHODs (default: %02d): @@ -496,11 +496,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") --mac METHODs (default: %02d or %02d): - - 00 sha256 (just simple hash, no MAC, faster on 32bit CPU) - - 01 sha512-256 (just simple hash, no MAC, faster on 64bit CPU) - - 10 hmac-sha256 (HMAC, faster on 32bit CPU) - - 11 hmac-sha512-256 (HMAC, faster on 64bit CPU) - - 20 gmac (MAC, fastest on CPUs with AES-GCM HW support) + - 00 sha256 (simple hash, no MAC, faster on 32bit CPU) + - 01 sha512-256 (simple hash, no MAC, faster on 64bit CPU) + - 02 ghash (simple hash, no MAC, fastest on CPUs with AES-GCM support) + - 10 hmac-sha256 (MAC, faster on 32bit CPU) + - 11 hmac-sha512-256 (MAC, faster on 64bit CPU) + - 20 gmac (MAC, fastest on CPUs with AES-GCM support) """ % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, @@ -511,7 +512,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', choices=('none', 'passphrase', 'keyfile'), default='none', - help='select encryption method') + help='select encryption key method') subparser.add_argument('-C', '--cipher', dest='cipher', type=int, default=None, metavar='METHOD', help='select cipher (0..2)') From 30f48879da94730e53f033837e087df5fa33d952 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 15 Mar 2015 14:27:47 +0100 Subject: [PATCH 31/58] add tuning docs --- docs/index.rst | 1 + docs/tuning.rst | 139 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 docs/tuning.rst diff --git a/docs/index.rst b/docs/index.rst index 711eaf15..c5dd9906 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -50,6 +50,7 @@ User's Guide quickstart usage faq + tuning internals Getting help diff --git a/docs/tuning.rst b/docs/tuning.rst new file mode 100644 index 00000000..a6f3a49c --- /dev/null +++ b/docs/tuning.rst @@ -0,0 +1,139 @@ +.. _tuning: +.. include:: global.rst.inc + +Tuning +====== + +General hints +------------- +CPU load, backup speed, memory and storage usage are covered below. + +As performance and resource usage depend on a lot of factors, you may need to +tweak the parameters a bit and retry until you found the best ones for your +setup. + +Usually, the default parameters are selected for best speed under the assumption +that you run a modern machine with fast CPU, fast I/O and a good amount of RAM. + +If you run an older or low-resource machine or your backup target or connection +to it is slow, tweaking parameters might give significant speedups. + +Exclude crap data +----------------- +Maybe you don't want to backup: + +* cache / temporary files (they can be rebuilt / are useless) +* specific directories / filenames / file extensions you do not need +* backups (some people make backups of backups...) + +You can exclude these, so they don't waste time and space. + +Speed (in general) +------------------ +Keep an eye on CPU and I/O bounds. Try to find the sweet spot in the middle +where it is not too much I/O bound and not too much CPU bound. + +I/O bound +~~~~~~~~~ +If CPU load does not sum up to 1 core fully loaded while backing up, the +process is likely I/O bound (can't read or write data fast enough). + +Maybe you want to try higher compression then so it has less data to write. +Or get faster I/O, if possible. + +CPU bound +~~~~~~~~~ +If you have 1 core fully loaded most of the time, but your backup seems slow, +the process is likely CPU bound (can't compute fast enough). + +Maybe you want to try lower compression then so it has less to compute. +Using a faster MAC or cipher method might also be an option. +Or get a faster CPU. + +I/O speed +--------- +From fast to slower: + +* fast local filesystem, SSD or HDD, via PCIe, SATA, USB +* ssh connection to a remote server's attic instance +* mounted network filesystems of a remote server + +Not only throughput influences timing, latency does also. + +Backup space needed +------------------- +If you always backup the same data mostly, you will often save a lot of space +due to deduplication - this works independently from compression. + +To avoid running out of space, regularly prune your backup archives according +to your needs. Backups of same machine which are close in time are usually +very cheap (because most data is same and deduplicated). + +Compression +----------- +If you have a fast backup source and destination and you are not low on backup space: +Switch off compression, your backup will run faster and with less cpu load. + +If you just want to save a bit space, but stay relatively fast: +Try zlib level 1. + +If you have very slow source or destination (e.g. a remote backup space via a +network connection that is quite slower than your local and remote storage): +Try a higher zlib or lzma. + +Authentication & MAC selection +------------------------------ +Real MACs (Message Authentication Codes) can only be used when a secret key is +available. It is signing your backup data and can detect malicious tampering. +Without a key, a simple hash will be used (which helps to detect accidental +data corruption, but can not detect malicious data tampering). + +Older or simple 32bit machine architecture +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use sha256 (no key) or hmac-sha256 (key). + +64bit architecture, but no AES hardware acceleration in the CPU +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use sha512-256 (no key) or hmac-sha512-256 (key). + +Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use ghash (no key) or gmac (key). + +Encryption & Cipher selection +----------------------------- +Always encrypt your backups (and keep passphrase and key file [if any] safe). + +The cipher selection chooses between misc. AEAD ciphers (authenticated +encryption with associated data), it is EtM (encrypt-then-mac): + +Older or simple 32bit machine architecture +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use aes256-ctr + hmac-sha256. + +64bit architecture, but no AES hardware acceleration in the CPU +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use aes256-ctr + hmac-sha512-256. + +Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Use aes256-gcm (AEAD 1-pass cipher). + +RAM usage +--------- +Depending on the amount of files and chunks in the repository, memory usage +varies: + +* about 250+B RAM per file (for "files" cache) +* about 44B RAM per 64kiB chunk (for "chunks" cache) +* about 40B RAM per 64kiB chunk (for repository index, if remote repo is used, + this will be allocated on remote side) + +If you run into memory usage issues, your options are: + +* get more RAM (or more swapspace, speed will be slower) +* disable the "files" cache, speed will be slower +* have less files / chunks per repo + +Note: RAM compression likely won't help as a lot of that data is using +msgpack, which is already rather efficient. From f767d5803e3e6379bdccc57eb13fc423060a6097 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Mar 2015 21:37:12 +0100 Subject: [PATCH 32/58] fix max unpack length --- attic/key.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/key.py b/attic/key.py index 5c26e166..6f480e87 100644 --- a/attic/key.py +++ b/attic/key.py @@ -658,7 +658,7 @@ def parser03(all_data): # new & flexible meta is a Meta namedtuple and contains all required information about data. data is maybe compressed (see meta) and maybe encrypted (see meta). """ - max_len = 2000000 # XXX formula? + max_len = 10000000 # XXX formula? unpacker = msgpack.Unpacker( use_list=False, # avoid memory allocation issues causes by tampered input data. From 4f7c8326b7da5d1f7529efe3e17c8990be8a87d8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 19 Mar 2015 01:26:56 +0100 Subject: [PATCH 33/58] tuning: add a note about scrolling --- docs/tuning.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/tuning.rst b/docs/tuning.rst index a6f3a49c..746127f9 100644 --- a/docs/tuning.rst +++ b/docs/tuning.rst @@ -28,6 +28,14 @@ Maybe you don't want to backup: You can exclude these, so they don't waste time and space. +Avoid scrolling +--------------- +If you do benchmarks, avoid creating a lot of log output, especially if it +means scrolling text in a window on a graphical user interface. + +Rather use much less log output or at least redirect the output to a log file, +that is also much faster than scrolling. + Speed (in general) ------------------ Keep an eye on CPU and I/O bounds. Try to find the sweet spot in the middle From 2ac57f2d12572309da82169b867339e144710100 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 Mar 2015 14:56:22 +0100 Subject: [PATCH 34/58] add condensed description of the changes in merge-all branch --- CHANGES-merge-all.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 CHANGES-merge-all.txt diff --git a/CHANGES-merge-all.txt b/CHANGES-merge-all.txt new file mode 100644 index 00000000..6824a783 --- /dev/null +++ b/CHANGES-merge-all.txt @@ -0,0 +1,23 @@ +Stuff in merge-all and not in merge minus minor changes: + +added tuning docs + +attic init --compression NN --cipher NN --mac NN ... +(see attic init --help) + +new hashes: sha512_256 + ghash (default) +new MACs: hmac-sha512-256 + gmac (default) +new ciphers: aes256-ctr + hmac-sha512-256 + aes256-gcm (default) +new compression: no compression (default) + zlib level 1..9 (previously, level 6 was hardcoded) + lzma preset 0..9 + +source: more flexible type 0x03 header format, allowing to give hash algo, +compression algo and level, encryption algo, key type. + +source: less hardcoding, numeric offsets / lengths +source: flexible hashing, compression, encryption, key dispatching + From af1c8c1b92c7e5a07b9ef5622dc0a7c471c931ee Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 Mar 2015 20:10:43 +0100 Subject: [PATCH 35/58] refactor parser code, just parse, simplify return tuple --- attic/key.py | 24 +++++++++++++----------- attic/testsuite/archiver.py | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/attic/key.py b/attic/key.py index 6f480e87..902e6b7a 100644 --- a/attic/key.py +++ b/attic/key.py @@ -307,7 +307,8 @@ class KeyBase(object): return generate(meta, data) def decrypt(self, id, data): - meta, data, compressor, keyer, maccer, cipher = parser(data) + meta, data = parser(data) + compressor, keyer, maccer, cipher = get_implementations(meta) assert isinstance(self, keyer) assert self.maccer_cls is maccer assert self.cipher_cls is cipher @@ -333,7 +334,8 @@ class PlaintextKey(KeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) + meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) return cls(compressor, maccer, cipher) @@ -351,8 +353,7 @@ class AESKeyBase(KeyBase): only 295 exabytes! """ def extract_nonce(self, payload): - meta, data, compressor, keyer, maccer, cipher = parser(payload) - assert isinstance(self, keyer) + meta, data = parser(payload) nonce = bytes_to_long(meta.stored_iv) return nonce @@ -405,7 +406,8 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) + meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) key = cls(compressor, maccer, cipher) passphrase = os.environ.get('ATTIC_PASSPHRASE') if passphrase is None: @@ -437,7 +439,8 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) + meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) key = cls(compressor, maccer, cipher) path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path @@ -637,8 +640,7 @@ def legacy_parser(all_data, key_type): # all rather hardcoded meta = Meta(compr_type=6, key_type=key_type, mac_type=HMAC_SHA256.TYPE, cipher_type=AES_CTR_HMAC.TYPE, hmac=hmac, stored_iv=stored_iv) - compressor, keyer, maccer, cipher = get_implementations(meta) - return meta, data, compressor, keyer, maccer, cipher + return meta, data def parser00(all_data): return legacy_parser(all_data, KeyfileKey.TYPE) @@ -672,8 +674,7 @@ def parser03(all_data): # new & flexible unpacker.feed(all_data[1:]) meta_tuple, data = unpacker.unpack() meta = Meta(*meta_tuple) - compressor, keyer, maccer, cipher = get_implementations(meta) - return meta, data, compressor, keyer, maccer, cipher + return meta, data def parser(data): @@ -689,7 +690,8 @@ def parser(data): def key_factory(repository, manifest_data): - meta, data, compressor, keyer, maccer, cipher = parser(manifest_data) + meta, data = parser(manifest_data) + compressor, keyer, maccer, cipher = get_implementations(meta) return keyer.detect(repository, manifest_data) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 4d7f40d3..01e0bccd 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -383,7 +383,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): hash = sha256(data).digest() if hash not in seen: seen.add(hash) - meta, data, _, _, _, _ = parser(data) + meta, data = parser(data) num_blocks = num_aes_blocks(len(data)) nonce = bytes_to_long(meta.stored_iv) for counter in range(nonce, nonce + num_blocks): From add6bd96e769ca59d1a3b81fe53bae1d52e08072 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 Mar 2015 21:04:26 +0100 Subject: [PATCH 36/58] move (h)mac out of meta, so we could include the whole meta into mac computation note: - incompatible to previous storage format of merge-all branch - compatible to master branch / official attic --- attic/key.py | 42 ++++++++++++++++++------------------- attic/testsuite/archiver.py | 2 +- attic/testsuite/key.py | 20 +++++++++--------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/attic/key.py b/attic/key.py index 902e6b7a..5269164f 100644 --- a/attic/key.py +++ b/attic/key.py @@ -24,7 +24,7 @@ from attic.helpers import IntegrityError, get_keys_dir, Error # zero anyway as the full IV is a 128bit counter. PREFIX are the upper 8 bytes, # stored_iv are the lower 8 Bytes. PREFIX = b'\0' * 8 -Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, hmac, stored_iv') +Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, stored_iv') class UnsupportedPayloadError(Error): @@ -300,19 +300,19 @@ class KeyBase(object): def encrypt(self, data): data = self.compressor.compress(data) - tag, iv_last8, data = self.cipher.compute_tag_and_encrypt(data) + mac, iv_last8, data = self.cipher.compute_tag_and_encrypt(data) meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE, mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE, - hmac=tag, stored_iv=iv_last8) - return generate(meta, data) + stored_iv=iv_last8) + return generate(mac, meta, data) def decrypt(self, id, data): - meta, data = parser(data) + mac, meta, data = parser(data) compressor, keyer, maccer, cipher = get_implementations(meta) assert isinstance(self, keyer) assert self.maccer_cls is maccer assert self.cipher_cls is cipher - data = self.cipher.check_tag_and_decrypt(meta.hmac, meta.stored_iv, data) + data = self.cipher.check_tag_and_decrypt(mac, meta.stored_iv, data) data = self.compressor.decompress(data) if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') @@ -334,7 +334,7 @@ class PlaintextKey(KeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data = parser(manifest_data) + mac, meta, data = parser(manifest_data) compressor, keyer, maccer, cipher = get_implementations(meta) return cls(compressor, maccer, cipher) @@ -353,7 +353,7 @@ class AESKeyBase(KeyBase): only 295 exabytes! """ def extract_nonce(self, payload): - meta, data = parser(payload) + mac, meta, data = parser(payload) nonce = bytes_to_long(meta.stored_iv) return nonce @@ -406,7 +406,7 @@ class PassphraseKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - meta, data = parser(manifest_data) + mac, meta, data = parser(manifest_data) compressor, keyer, maccer, cipher = get_implementations(meta) key = cls(compressor, maccer, cipher) passphrase = os.environ.get('ATTIC_PASSPHRASE') @@ -439,7 +439,7 @@ class KeyfileKey(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - meta, data = parser(manifest_data) + mac, meta, data = parser(manifest_data) compressor, keyer, maccer, cipher = get_implementations(meta) key = cls(compressor, maccer, cipher) path = cls.find_key_file(repository) @@ -630,17 +630,17 @@ def legacy_parser(all_data, key_type): # all rather hardcoded """ offset = 1 if key_type == PlaintextKey.TYPE: - hmac = None - iv = stored_iv = None + mac = None + stored_iv = None data = all_data[offset:] else: - hmac = all_data[offset:offset+32] + mac = all_data[offset:offset+32] stored_iv = all_data[offset+32:offset+40] data = all_data[offset+40:] meta = Meta(compr_type=6, key_type=key_type, mac_type=HMAC_SHA256.TYPE, cipher_type=AES_CTR_HMAC.TYPE, - hmac=hmac, stored_iv=stored_iv) - return meta, data + stored_iv=stored_iv) + return mac, meta, data def parser00(all_data): return legacy_parser(all_data, KeyfileKey.TYPE) @@ -655,7 +655,7 @@ def parser02(all_data): def parser03(all_data): # new & flexible """ Payload layout: - always: TYPE(1) + MSGPACK((meta, data)) + always: TYPE(1) + MSGPACK((tag, meta, data)) meta is a Meta namedtuple and contains all required information about data. data is maybe compressed (see meta) and maybe encrypted (see meta). @@ -672,9 +672,9 @@ def parser03(all_data): # new & flexible max_ext_len=0, # not used yet ) unpacker.feed(all_data[1:]) - meta_tuple, data = unpacker.unpack() + mac, meta_tuple, data = unpacker.unpack() meta = Meta(*meta_tuple) - return meta, data + return mac, meta, data def parser(data): @@ -690,14 +690,14 @@ def parser(data): def key_factory(repository, manifest_data): - meta, data = parser(manifest_data) + mac, meta, data = parser(manifest_data) compressor, keyer, maccer, cipher = get_implementations(meta) return keyer.detect(repository, manifest_data) -def generate(meta, data): +def generate(mac, meta, data): # always create new-style 0x03 format - return b'\x03' + msgpack.packb((meta, data), use_bin_type=True) + return b'\x03' + msgpack.packb((mac, meta, data), use_bin_type=True) def compressor_creator(args): diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 01e0bccd..5d4c2194 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -383,7 +383,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): hash = sha256(data).digest() if hash not in seen: seen.add(hash) - meta, data = parser(data) + mac, meta, data = parser(data) num_blocks = num_aes_blocks(len(data)) nonce = bytes_to_long(meta.stored_iv) for counter in range(nonce, nonce + num_blocks): diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 51914e48..cde0b79f 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -19,19 +19,19 @@ class KeyTestCase(AtticTestCase): keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 -hqlhbGdvcml0aG2kZ21hY6d2ZXJzaW9uAaRoYXNo2gAgeXkW700i+1t5mroRI9YQuAAAAA -AAAAAAAAAAAAAAAACkZGF0YdoA0FVh2YsC4Nd5Pd+9wm6m/HbXnfy7ahBQNUp/grFY/LN7 -CPZYHM9tblJ40Kklnn6pktJhgEizgOzK435wbRWeuYiLO4+W0AEX74i0GcFafOhN7DyLYA -jE1qQMTm7tK2LlapnKVOOiH3KV67pdSMtRYDrHbx0Gud3jBtfMGU39nuwEFfWwIzQ8b4Tm -SWlG6orGwmvRJn8a5H+JtOY90e+tM7s2M4VF6p8grtUyighYxJrO4Y78/fsDpSHbYAh+en -6GrpcESLKYoDtgqiyjle0LpQ6kc2FsdNoAIKhlgtF1As4InTAsR3bCQif78vGjYYMKerJQ -ge5ZaKvpqml0ZXJhdGlvbnPOAAGGoA==""".strip() +hqppdGVyYXRpb25zzgABhqCkc2FsdNoAIDq9JP02h8kcifnmD32O8kvEVHvgfjz3XgxeTt +wEZNGupGRhdGHaANDXW3xga6hSj1Ix8a41jQKIeX9kZo2Zvyy8XTxX7hbgQKm82649nAfm +hNMTrukDNyrwYN5dUGlS60XUccmfOa+rVJZkQhEiblpC7teFrQvYYUB5in83vDJK8XG8yS +6yHh6uQC5IdTdofTRN41JkQvXyd2wSzvWnqCrVTS8IEN4fmVXbNdJpHHzFxGDtsLRPP1FX +MdB35RjBHsHocJs+uk0syXQwfuVhq/AJQg24GznHpM4rnli8UTe82jM/7BXDAMOUDvTicF +cuzUZa5TlKphowp3ZlcnNpb24BqWFsZ29yaXRobaRnbWFjpGhhc2jaACBkWGoI42Vpa7c7 +yeZwRQ7VAAAAAAAAAAAAAAAAAAAAAA==""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 03929600001402c4207f9b12b337e123e322ca2af795788ee100000000000000000000000000000000 - c4080000000000000000c407624711de25ab38 + 0393c420cff16872afba0a609bfa4b458e9ea4e900000000000000000000000000000000 + 9500001402c4080000000000000000c407e04fb0a78f1a39 """)) - keyfile2_id = unhexlify('4d532cec0eb8ec34d65c5491b5158b1400000000000000000000000000000000') + keyfile2_id = unhexlify('7cf9e207968deea8ea54f14ccf814cfe00000000000000000000000000000000') def setUp(self): self.tmppath = tempfile.mkdtemp() From 3149f6a8283079d2a4bbb88c0cd544d169931813 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 Mar 2015 21:27:25 +0100 Subject: [PATCH 37/58] cosmetic: s/tag/mac/ mac is a more specific term, tag is too general of course it is only a real MAC if we have keys, otherwise it is a hash. --- attic/crypto.pyx | 22 ++++++++++---------- attic/key.py | 44 +++++++++++++++++++-------------------- attic/testsuite/crypto.py | 18 ++++++++-------- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index d92806f3..a087e845 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -10,7 +10,7 @@ API_VERSION = 2 AES_CTR_MODE = 1 AES_GCM_MODE = 2 -TAG_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below. +MAC_SIZE = 16 # bytes; 128 bits is the maximum allowed value. see "hack" below. IV_SIZE = 16 # bytes; 128 bits cdef extern from "openssl/rand.h": @@ -159,13 +159,13 @@ cdef class AES: if not EVP_DecryptUpdate(&self.ctx, NULL, &outl, aad, aadl): raise Exception('EVP_DecryptUpdate failed') - def compute_tag_and_encrypt(self, data): + def compute_mac_and_encrypt(self, data): cdef int inl = len(data) cdef int ctl = 0 cdef int outl = 0 # note: modes that use padding, need up to one extra AES block (16B) cdef unsigned char *out = malloc(inl+16) - cdef unsigned char *tag = malloc(TAG_SIZE) + cdef unsigned char *mac = malloc(MAC_SIZE) if not out: raise MemoryError try: @@ -176,16 +176,16 @@ cdef class AES: raise Exception('EVP_EncryptFinal failed') ctl += outl if self.mode == AES_GCM_MODE: - # Get tag (only GCM mode. for CTR, the returned tag is undefined) - if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, TAG_SIZE, tag): + # Get tag (mac) - only GCM mode. for CTR, the returned mac is undefined + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, MAC_SIZE, mac): raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed') # hack: caller wants 32B tags (256b), so we give back that amount - return (tag[:TAG_SIZE] + b'\x00'*16), out[:ctl] + return (mac[:MAC_SIZE] + b'\x00'*16), out[:ctl] finally: - free(tag) + free(mac) free(out) - def check_tag_and_decrypt(self, tag, data): + def check_mac_and_decrypt(self, mac, data): cdef int inl = len(data) cdef int ptl = 0 cdef int outl = 0 @@ -200,11 +200,11 @@ cdef class AES: raise Exception('EVP_DecryptUpdate failed') ptl = outl if self.mode == AES_GCM_MODE: - # Set expected tag value. - if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, TAG_SIZE, tag): + # Set expected tag (mac) value. + if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, MAC_SIZE, mac): raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed') if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: - # for GCM mode, a failure here means corrupted / tampered tag or data + # for GCM mode, a failure here means corrupted / tampered tag (mac) or data raise Exception('EVP_DecryptFinal failed') ptl += outl return out[:ptl] diff --git a/attic/key.py b/attic/key.py index 5269164f..20b88191 100644 --- a/attic/key.py +++ b/attic/key.py @@ -111,8 +111,8 @@ class GHASH: mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=self.key, iv=b'\0' * 16) # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data mac_cipher.add(bytes(self.data)) - tag, _ = mac_cipher.compute_tag_and_encrypt(b'') - return tag + hash, _ = mac_cipher.compute_mac_and_encrypt(b'') + return hash class HMAC_SHA256(HMAC): @@ -202,10 +202,10 @@ class PLAIN: def __init__(self, **kw): pass - def compute_tag_and_encrypt(self, data): + def compute_mac_and_encrypt(self, data): return b'', b'', data - def check_tag_and_decrypt(self, tag, iv_last8, data): + def check_mac_and_decrypt(self, mac, iv_last8, data): return data @@ -218,22 +218,22 @@ class AES_CTR_HMAC: self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key) - def compute_tag_and_encrypt(self, data): + def compute_mac_and_encrypt(self, data): self.enc_cipher.reset(iv=self.enc_iv) iv_last8 = self.enc_iv[8:] - _, data = self.enc_cipher.compute_tag_and_encrypt(data) + _, data = self.enc_cipher.compute_mac_and_encrypt(data) # increase the IV (counter) value so same value is never used twice current_iv = bytes_to_long(iv_last8) self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) - tag = HMAC(self.hmac_key, iv_last8 + data, sha256).digest() # XXX mac / hash flexibility - return tag, iv_last8, data + mac = HMAC(self.hmac_key, iv_last8 + data, sha256).digest() # XXX mac / hash flexibility + return mac, iv_last8, data - def check_tag_and_decrypt(self, tag, iv_last8, data): + def check_mac_and_decrypt(self, mac, iv_last8, data): iv = PREFIX + iv_last8 - if HMAC(self.hmac_key, iv_last8 + data, sha256).digest() != tag: + if HMAC(self.hmac_key, iv_last8 + data, sha256).digest() != mac: raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=iv) - data = self.dec_cipher.check_tag_and_decrypt(None, data) + data = self.dec_cipher.check_mac_and_decrypt(None, data) return data @@ -246,22 +246,22 @@ class AES_GCM: self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key) - def compute_tag_and_encrypt(self, data): + def compute_mac_and_encrypt(self, data): self.enc_cipher.reset(iv=self.enc_iv) iv_last8 = self.enc_iv[8:] self.enc_cipher.add(iv_last8) - tag, data = self.enc_cipher.compute_tag_and_encrypt(data) + mac, data = self.enc_cipher.compute_mac_and_encrypt(data) # increase the IV (counter) value so same value is never used twice current_iv = bytes_to_long(iv_last8) self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) - return tag, iv_last8, data + return mac, iv_last8, data - def check_tag_and_decrypt(self, tag, iv_last8, data): + def check_mac_and_decrypt(self, mac, iv_last8, data): iv = PREFIX + iv_last8 self.dec_cipher.reset(iv=iv) self.dec_cipher.add(iv_last8) try: - data = self.dec_cipher.check_tag_and_decrypt(tag, data) + data = self.dec_cipher.check_mac_and_decrypt(mac, data) except Exception: raise IntegrityError('Encryption envelope checksum mismatch') return data @@ -300,7 +300,7 @@ class KeyBase(object): def encrypt(self, data): data = self.compressor.compress(data) - mac, iv_last8, data = self.cipher.compute_tag_and_encrypt(data) + mac, iv_last8, data = self.cipher.compute_mac_and_encrypt(data) meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE, mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE, stored_iv=iv_last8) @@ -312,7 +312,7 @@ class KeyBase(object): assert isinstance(self, keyer) assert self.maccer_cls is maccer assert self.cipher_cls is cipher - data = self.cipher.check_tag_and_decrypt(mac, meta.stored_iv, data) + data = self.cipher.check_mac_and_decrypt(mac, meta.stored_iv, data) data = self.compressor.decompress(data) if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') @@ -486,7 +486,7 @@ class KeyfileKey(AESKeyBase): key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) try: cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=b'\0'*16) - data = cipher.check_tag_and_decrypt(d[b'hash'], d[b'data']) + data = cipher.check_mac_and_decrypt(d[b'hash'], d[b'data']) return data except Exception: return None @@ -496,13 +496,13 @@ class KeyfileKey(AESKeyBase): iterations = 100000 key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0'*16) - tag, cdata = cipher.compute_tag_and_encrypt(data) + mac, cdata = cipher.compute_mac_and_encrypt(data) d = { 'version': 1, 'salt': salt, 'iterations': iterations, 'algorithm': 'gmac', - 'hash': tag, + 'hash': mac, 'data': cdata, } return msgpack.packb(d) @@ -655,7 +655,7 @@ def parser02(all_data): def parser03(all_data): # new & flexible """ Payload layout: - always: TYPE(1) + MSGPACK((tag, meta, data)) + always: TYPE(1) + MSGPACK((mac, meta, data)) meta is a Meta namedtuple and contains all required information about data. data is maybe compressed (see meta) and maybe encrypted (see meta). diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index 97cc065c..8b523494 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -34,11 +34,11 @@ class CryptoTestCase(AtticTestCase): data = b'foo' * 10 # encrypt aes = AES(mode=AES_CTR_MODE, is_encrypt=True, key=key, iv=iv) - _, cdata = aes.compute_tag_and_encrypt(data) + _, cdata = aes.compute_mac_and_encrypt(data) self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') - # decrypt (correct tag/cdata) + # decrypt (correct mac/cdata) aes = AES(mode=AES_CTR_MODE, is_encrypt=False, key=key, iv=iv) - pdata = aes.check_tag_and_decrypt(None, cdata) + pdata = aes.check_mac_and_decrypt(None, cdata) self.assert_equal(data, pdata) def test_aes_gcm(self): @@ -47,14 +47,14 @@ class CryptoTestCase(AtticTestCase): data = b'foo' * 10 # encrypt aes = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=iv) - tag, cdata = aes.compute_tag_and_encrypt(data) - self.assert_equal(hexlify(tag), b'c98aa10eb6b7031bcc2160878d9438fb00000000000000000000000000000000') + mac, cdata = aes.compute_mac_and_encrypt(data) + self.assert_equal(hexlify(mac), b'c98aa10eb6b7031bcc2160878d9438fb00000000000000000000000000000000') self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741') - # decrypt (correct tag/cdata) + # decrypt (correct mac/cdata) aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv) - pdata = aes.check_tag_and_decrypt(tag, cdata) + pdata = aes.check_mac_and_decrypt(mac, cdata) self.assert_equal(data, pdata) - # decrypt (incorrect tag/cdata) + # decrypt (incorrect mac/cdata) aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv) cdata = b'x' + cdata[1:] # corrupt cdata - self.assertRaises(Exception, aes.check_tag_and_decrypt, tag, cdata) + self.assertRaises(Exception, aes.check_mac_and_decrypt, mac, cdata) From 012d6448d2a8005d3a636f6115b3effc3fd378f4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 00:05:57 +0100 Subject: [PATCH 38/58] compute mac over all meta, make IV storage flexible, still support legacy legacy: - last 8 bytes of IV are stored, upper 8 are assumed to be zero - mac is computed over iv_last8 + data new: - store complete IV - mac is computed over complete meta + data refactored some code into separate increment_iv and get_aad functions --- attic/key.py | 106 ++++++++++++++++++++++-------------- attic/testsuite/archiver.py | 2 +- attic/testsuite/key.py | 49 +++++++++-------- 3 files changed, 91 insertions(+), 66 deletions(-) diff --git a/attic/key.py b/attic/key.py index 20b88191..34012469 100644 --- a/attic/key.py +++ b/attic/key.py @@ -24,7 +24,7 @@ from attic.helpers import IntegrityError, get_keys_dir, Error # zero anyway as the full IV is a 128bit counter. PREFIX are the upper 8 bytes, # stored_iv are the lower 8 Bytes. PREFIX = b'\0' * 8 -Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, stored_iv') +Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy') class UnsupportedPayloadError(Error): @@ -198,17 +198,46 @@ COMPR_DEFAULT = NullCompressor.TYPE # no compression class PLAIN: TYPE = 0 + enc_iv = None # dummy def __init__(self, **kw): pass - def compute_mac_and_encrypt(self, data): - return b'', b'', data + def compute_mac_and_encrypt(self, meta, data): + return None, data - def check_mac_and_decrypt(self, mac, iv_last8, data): + def check_mac_and_decrypt(self, mac, meta, data): return data +def increment_iv(iv, amount): + """ + increment the given IV considering that bytes of data was + encrypted based on it. In CTR / GCM mode, the IV is just a counter and + must never repeat. + + :param iv: current IV, 16 bytes (128 bit) + :param amount: amount of data (in bytes) that was encrypted + :return: new IV, 16 bytes (128 bit) + """ + # TODO: code assumes that the last 8 bytes are enough, the upper 8 always zero + iv_last8 = iv[8:] + current_iv = bytes_to_long(iv_last8) + new_iv = current_iv + num_aes_blocks(amount) + iv_last8 = long_to_bytes(new_iv) + iv = PREFIX + iv_last8 + return iv + + +def get_aad(meta): + """get additional authenticated data for AEAD ciphers""" + if meta.legacy: + # legacy format computed the mac over (iv_last8 + data) + return meta.iv[8:] + else: + return msgpack.packb(meta) + + class AES_CTR_HMAC: TYPE = 1 @@ -218,21 +247,19 @@ class AES_CTR_HMAC: self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key) - def compute_mac_and_encrypt(self, data): - self.enc_cipher.reset(iv=self.enc_iv) - iv_last8 = self.enc_iv[8:] + def compute_mac_and_encrypt(self, meta, data): + self.enc_cipher.reset(iv=meta.iv) _, data = self.enc_cipher.compute_mac_and_encrypt(data) - # increase the IV (counter) value so same value is never used twice - current_iv = bytes_to_long(iv_last8) - self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) - mac = HMAC(self.hmac_key, iv_last8 + data, sha256).digest() # XXX mac / hash flexibility - return mac, iv_last8, data + self.enc_iv = increment_iv(meta.iv, len(data)) + aad = get_aad(meta) + mac = HMAC(self.hmac_key, aad + data, sha256).digest() # XXX mac / hash flexibility + return mac, data - def check_mac_and_decrypt(self, mac, iv_last8, data): - iv = PREFIX + iv_last8 - if HMAC(self.hmac_key, iv_last8 + data, sha256).digest() != mac: + def check_mac_and_decrypt(self, mac, meta, data): + aad = get_aad(meta) + if HMAC(self.hmac_key, aad + data, sha256).digest() != mac: raise IntegrityError('Encryption envelope checksum mismatch') - self.dec_cipher.reset(iv=iv) + self.dec_cipher.reset(iv=meta.iv) data = self.dec_cipher.check_mac_and_decrypt(None, data) return data @@ -246,20 +273,18 @@ class AES_GCM: self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv) self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key) - def compute_mac_and_encrypt(self, data): - self.enc_cipher.reset(iv=self.enc_iv) - iv_last8 = self.enc_iv[8:] - self.enc_cipher.add(iv_last8) + def compute_mac_and_encrypt(self, meta, data): + self.enc_cipher.reset(iv=meta.iv) + aad = get_aad(meta) + self.enc_cipher.add(aad) mac, data = self.enc_cipher.compute_mac_and_encrypt(data) - # increase the IV (counter) value so same value is never used twice - current_iv = bytes_to_long(iv_last8) - self.enc_iv = PREFIX + long_to_bytes(current_iv + num_aes_blocks(len(data))) - return mac, iv_last8, data + self.enc_iv = increment_iv(meta.iv, len(data)) + return mac, data - def check_mac_and_decrypt(self, mac, iv_last8, data): - iv = PREFIX + iv_last8 - self.dec_cipher.reset(iv=iv) - self.dec_cipher.add(iv_last8) + def check_mac_and_decrypt(self, mac, meta, data): + self.dec_cipher.reset(iv=meta.iv) + aad = get_aad(meta) + self.dec_cipher.add(aad) try: data = self.dec_cipher.check_mac_and_decrypt(mac, data) except Exception: @@ -300,10 +325,10 @@ class KeyBase(object): def encrypt(self, data): data = self.compressor.compress(data) - mac, iv_last8, data = self.cipher.compute_mac_and_encrypt(data) meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE, mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE, - stored_iv=iv_last8) + iv=self.cipher.enc_iv, legacy=False) + mac, data = self.cipher.compute_mac_and_encrypt(meta, data) return generate(mac, meta, data) def decrypt(self, id, data): @@ -312,7 +337,7 @@ class KeyBase(object): assert isinstance(self, keyer) assert self.maccer_cls is maccer assert self.cipher_cls is cipher - data = self.cipher.check_mac_and_decrypt(mac, meta.stored_iv, data) + data = self.cipher.check_mac_and_decrypt(mac, meta, data) data = self.compressor.decompress(data) if id and self.id_hash(data) != id: raise IntegrityError('Chunk id verification failed') @@ -352,10 +377,9 @@ class AESKeyBase(KeyBase): affect security but limits the maximum repository capacity to only 295 exabytes! """ - def extract_nonce(self, payload): - mac, meta, data = parser(payload) - nonce = bytes_to_long(meta.stored_iv) - return nonce + def extract_iv(self, payload): + _, meta, _ = parser(payload) + return meta.iv def init_from_random_data(self, data): self.enc_key = data[0:32] @@ -416,8 +440,7 @@ class PassphraseKey(AESKeyBase): key.init(repository, passphrase) try: key.decrypt(None, manifest_data) - num_blocks = num_aes_blocks(len(data)) - key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) + key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data))) return key except IntegrityError: passphrase = getpass(prompt) @@ -447,8 +470,7 @@ class KeyfileKey(AESKeyBase): passphrase = os.environ.get('ATTIC_PASSPHRASE', '') while not key.load(path, passphrase): passphrase = getpass(prompt) - num_blocks = num_aes_blocks(len(data)) - key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) + key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data))) return key @classmethod @@ -631,15 +653,15 @@ def legacy_parser(all_data, key_type): # all rather hardcoded offset = 1 if key_type == PlaintextKey.TYPE: mac = None - stored_iv = None + iv = None data = all_data[offset:] else: mac = all_data[offset:offset+32] - stored_iv = all_data[offset+32:offset+40] + iv = PREFIX + all_data[offset+32:offset+40] data = all_data[offset+40:] meta = Meta(compr_type=6, key_type=key_type, mac_type=HMAC_SHA256.TYPE, cipher_type=AES_CTR_HMAC.TYPE, - stored_iv=stored_iv) + iv=iv, legacy=True) return mac, meta, data def parser00(all_data): diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 5d4c2194..39ae2151 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -385,7 +385,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): seen.add(hash) mac, meta, data = parser(data) num_blocks = num_aes_blocks(len(data)) - nonce = bytes_to_long(meta.stored_iv) + nonce = bytes_to_long(meta.iv, 8) for counter in range(nonce, nonce + num_blocks): self.assert_not_in(counter, used) used.add(counter) diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index cde0b79f..11c87100 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -5,7 +5,7 @@ import tempfile from binascii import hexlify from attic.crypto import bytes_to_long from attic.testsuite import AtticTestCase -from attic.key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT +from attic.key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT, increment_iv from attic.helpers import Location, unhexlify @@ -19,19 +19,20 @@ class KeyTestCase(AtticTestCase): keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 -hqppdGVyYXRpb25zzgABhqCkc2FsdNoAIDq9JP02h8kcifnmD32O8kvEVHvgfjz3XgxeTt -wEZNGupGRhdGHaANDXW3xga6hSj1Ix8a41jQKIeX9kZo2Zvyy8XTxX7hbgQKm82649nAfm -hNMTrukDNyrwYN5dUGlS60XUccmfOa+rVJZkQhEiblpC7teFrQvYYUB5in83vDJK8XG8yS -6yHh6uQC5IdTdofTRN41JkQvXyd2wSzvWnqCrVTS8IEN4fmVXbNdJpHHzFxGDtsLRPP1FX -MdB35RjBHsHocJs+uk0syXQwfuVhq/AJQg24GznHpM4rnli8UTe82jM/7BXDAMOUDvTicF -cuzUZa5TlKphowp3ZlcnNpb24BqWFsZ29yaXRobaRnbWFjpGhhc2jaACBkWGoI42Vpa7c7 -yeZwRQ7VAAAAAAAAAAAAAAAAAAAAAA==""".strip() +hqlhbGdvcml0aG2kZ21hY6RoYXNo2gAgY7jwSMnBwpqD3Fk/aAdSAgAAAAAAAAAAAAAAAA +AAAACqaXRlcmF0aW9uc84AAYagp3ZlcnNpb24BpHNhbHTaACASqCq8G6a/K/W+bOrNDW65 +Sfl9ZHrTEtq6l+AMUmATxKRkYXRh2gDQuDVCijDzeZDD/JLPrOtsQL/vrZEWvCt5RuXFOt +tTZfbCJDmv2nt4KvYToVsp82pffZDcsLaOOBCTGurpkdefsdiLMgGiLlbrsXlES+fbKZfq +Tx2x2DjU4L1bFxuoypDIdk2lB3S98ZpFZ6yd1XtDBVTQ34FZTlDXIZ5HyuxAJBrGKYj/Un +Fk24N5xSoPfeQhE3r7hqEsGwEEX0s6sg0LHMGyc4xSBb13iZxWRlSdnvBC7teIeevhT/DU +scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA== +""".strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 0393c420cff16872afba0a609bfa4b458e9ea4e900000000000000000000000000000000 - 9500001402c4080000000000000000c407e04fb0a78f1a39 + 0393c420fd6e9ac6f8c49c4789d1c924c14c309200000000000000000000000000000000 + 9600001402c41000000000000000000000000000000000c2c4071352fe2286e3ed """)) - keyfile2_id = unhexlify('7cf9e207968deea8ea54f14ccf814cfe00000000000000000000000000000000') + keyfile2_id = unhexlify('d4954bcf8d7b1762356e91b2611c727800000000000000000000000000000000') def setUp(self): self.tmppath = tempfile.mkdtemp() @@ -65,17 +66,18 @@ yeZwRQ7VAAAAAAAAAAAAAAAAAAAAAA==""".strip() def test_keyfile(self): os.environ['ATTIC_PASSPHRASE'] = 'test' key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - self.assert_equal(bytes_to_long(key.enc_iv, 8), 0) + self.assert_equal(key.enc_iv, b'\0'*16) manifest = key.encrypt(b'XXX') - self.assert_equal(key.extract_nonce(manifest), 0) + self.assert_equal(key.extract_iv(manifest), b'\0'*16) manifest2 = key.encrypt(b'XXX') self.assert_not_equal(manifest, manifest2) self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) - self.assert_equal(key.extract_nonce(manifest2), 1) - iv = key.extract_nonce(manifest) + self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01') + iv = key.extract_iv(manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest) - # we just assume that the payload fits into 1 AES block (which is given for b'XXX'). - self.assert_equal(bytes_to_long(key2.enc_iv, 8), iv + 1) + # we assume that the payload fits into one 16B AES block (which is given for b'XXX'). + iv_plus_1 = increment_iv(iv, 16) + self.assert_equal(key2.enc_iv, iv_plus_1) # Key data sanity check self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3) self.assert_equal(key2.chunk_seed == 0, False) @@ -92,21 +94,22 @@ yeZwRQ7VAAAAAAAAAAAAAAAAAAAAAA==""".strip() def test_passphrase(self): os.environ['ATTIC_PASSPHRASE'] = 'test' key = PassphraseKey.create(self.MockRepository(), self.MockArgs()) - self.assert_equal(bytes_to_long(key.enc_iv, 8), 0) + self.assert_equal(key.enc_iv, b'\0'*16) self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901') self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a') self.assert_equal(key.chunk_seed, -775740477) manifest = key.encrypt(b'XXX') - self.assert_equal(key.extract_nonce(manifest), 0) + self.assert_equal(key.extract_iv(manifest), b'\0'*16) manifest2 = key.encrypt(b'XXX') self.assert_not_equal(manifest, manifest2) self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) - self.assert_equal(key.extract_nonce(manifest2), 1) - iv = key.extract_nonce(manifest) + self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01') + iv = key.extract_iv(manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest) - # we just assume that the payload fits into 1 AES block (which is given for b'XXX'). - self.assert_equal(bytes_to_long(key2.enc_iv, 8), iv + 1) + # we assume that the payload fits into one 16B AES block (which is given for b'XXX'). + iv_plus_1 = increment_iv(iv, 16) + self.assert_equal(key2.enc_iv, iv_plus_1) self.assert_equal(key.id_key, key2.id_key) self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key) self.assert_equal(key.enc_key, key2.enc_key) From c759eeee1eff4b345e538d9a6c5272fbf4029109 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 00:11:52 +0100 Subject: [PATCH 39/58] fix backwards compatibility for unencrypted repos --- attic/key.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/attic/key.py b/attic/key.py index 34012469..591f4e9e 100644 --- a/attic/key.py +++ b/attic/key.py @@ -652,16 +652,19 @@ def legacy_parser(all_data, key_type): # all rather hardcoded """ offset = 1 if key_type == PlaintextKey.TYPE: + mac_type = SHA256.TYPE mac = None + cipher_type = PLAIN.TYPE iv = None data = all_data[offset:] else: + mac_type = HMAC_SHA256.TYPE mac = all_data[offset:offset+32] + cipher_type = AES_CTR_HMAC.TYPE iv = PREFIX + all_data[offset+32:offset+40] data = all_data[offset+40:] - meta = Meta(compr_type=6, key_type=key_type, - mac_type=HMAC_SHA256.TYPE, cipher_type=AES_CTR_HMAC.TYPE, - iv=iv, legacy=True) + meta = Meta(compr_type=6, key_type=key_type, mac_type=mac_type, + cipher_type=cipher_type, iv=iv, legacy=True) return mac, meta, data def parser00(all_data): From 7e1aa163a3c7f9e4e312af34847359de2b99657f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 00:36:49 +0100 Subject: [PATCH 40/58] aes-gcm: only return the real mac (which is only 128b, 16B) code using id_hash output still expects 256b (32B), thus added a workaround for GHASH/GMAC. --- attic/crypto.pyx | 3 +-- attic/key.py | 2 +- attic/testsuite/crypto.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index a087e845..4e8e259a 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -179,8 +179,7 @@ cdef class AES: # Get tag (mac) - only GCM mode. for CTR, the returned mac is undefined if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, MAC_SIZE, mac): raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed') - # hack: caller wants 32B tags (256b), so we give back that amount - return (mac[:MAC_SIZE] + b'\x00'*16), out[:ctl] + return (mac[:MAC_SIZE]), out[:ctl] finally: free(mac) free(out) diff --git a/attic/key.py b/attic/key.py index 591f4e9e..723b321c 100644 --- a/attic/key.py +++ b/attic/key.py @@ -112,7 +112,7 @@ class GHASH: # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data mac_cipher.add(bytes(self.data)) hash, _ = mac_cipher.compute_mac_and_encrypt(b'') - return hash + return hash + b'\0'*16 # XXX hashindex code wants 32 bytes (256 bit) class HMAC_SHA256(HMAC): diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index 8b523494..bf3fe912 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -48,7 +48,7 @@ class CryptoTestCase(AtticTestCase): # encrypt aes = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=iv) mac, cdata = aes.compute_mac_and_encrypt(data) - self.assert_equal(hexlify(mac), b'c98aa10eb6b7031bcc2160878d9438fb00000000000000000000000000000000') + self.assert_equal(hexlify(mac), b'c98aa10eb6b7031bcc2160878d9438fb') self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741') # decrypt (correct mac/cdata) aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv) From 6dcf51b6e8e284b5f7d6cc388ce3f3295a2984cd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 01:42:13 +0100 Subject: [PATCH 41/58] hashindex: make key_size not hardcoded --- attic/_hashindex.c | 2 +- attic/hashindex.pyx | 43 ++++++++++++++++++++++++------------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/attic/_hashindex.c b/attic/_hashindex.c index a1f70699..0f89d580 100644 --- a/attic/_hashindex.c +++ b/attic/_hashindex.c @@ -355,7 +355,7 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs void *key = NULL; while((key = hashindex_next_key(index, key))) { - values = key + 32; + values = key + index->key_size; unique_size += values[1]; unique_csize += values[2]; size += values[0] * values[1]; diff --git a/attic/hashindex.pyx b/attic/hashindex.pyx index 13f9da93..1765151d 100644 --- a/attic/hashindex.pyx +++ b/attic/hashindex.pyx @@ -26,9 +26,10 @@ _NoDefault = object() cdef class IndexBase: cdef HashIndex *index - key_size = 32 + cdef int key_size - def __cinit__(self, capacity=0, path=None): + def __cinit__(self, capacity=0, path=None, key_size=32): + self.key_size = key_size if path: self.index = hashindex_read(os.fsencode(path)) if not self.index: @@ -61,7 +62,7 @@ cdef class IndexBase: self[key] = value def __delitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size if not hashindex_delete(self.index, key): raise Exception('hashindex_delete failed') @@ -90,14 +91,14 @@ cdef class NSIndex(IndexBase): value_size = 8 def __getitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: raise KeyError return _le32toh(data[0]), _le32toh(data[1]) def __setitem__(self, key, value): - assert len(key) == 32 + assert len(key) == self.key_size cdef int[2] data data[0] = _htole32(value[0]) data[1] = _htole32(value[1]) @@ -105,20 +106,20 @@ cdef class NSIndex(IndexBase): raise Exception('hashindex_set failed') def __contains__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) return data != NULL def iteritems(self, marker=None): cdef const void *key - iter = NSKeyIterator() + iter = NSKeyIterator(self.key_size) iter.idx = self iter.index = self.index if marker: key = hashindex_get(self.index, marker) if marker is None: raise IndexError - iter.key = key - 32 + iter.key = key - self.key_size return iter @@ -126,9 +127,11 @@ cdef class NSKeyIterator: cdef NSIndex idx cdef HashIndex *index cdef const void *key + cdef int key_size - def __cinit__(self): + def __cinit__(self, key_size): self.key = NULL + self.key_size = key_size def __iter__(self): return self @@ -137,8 +140,8 @@ cdef class NSKeyIterator: self.key = hashindex_next_key(self.index, self.key) if not self.key: raise StopIteration - cdef int *value = (self.key + 32) - return (self.key)[:32], (_le32toh(value[0]), _le32toh(value[1])) + cdef int *value = (self.key + self.key_size) + return (self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1])) cdef class ChunkIndex(IndexBase): @@ -146,14 +149,14 @@ cdef class ChunkIndex(IndexBase): value_size = 12 def __getitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: raise KeyError return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2]) def __setitem__(self, key, value): - assert len(key) == 32 + assert len(key) == self.key_size cdef int[3] data data[0] = _htole32(value[0]) data[1] = _htole32(value[1]) @@ -162,20 +165,20 @@ cdef class ChunkIndex(IndexBase): raise Exception('hashindex_set failed') def __contains__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) return data != NULL def iteritems(self, marker=None): cdef const void *key - iter = ChunkKeyIterator() + iter = ChunkKeyIterator(self.key_size) iter.idx = self iter.index = self.index if marker: key = hashindex_get(self.index, marker) if marker is None: raise IndexError - iter.key = key - 32 + iter.key = key - self.key_size return iter def summarize(self): @@ -188,9 +191,11 @@ cdef class ChunkKeyIterator: cdef ChunkIndex idx cdef HashIndex *index cdef const void *key + cdef int key_size - def __cinit__(self): + def __cinit__(self, key_size): self.key = NULL + self.key_size = key_size def __iter__(self): return self @@ -199,5 +204,5 @@ cdef class ChunkKeyIterator: self.key = hashindex_next_key(self.index, self.key) if not self.key: raise StopIteration - cdef int *value = (self.key + 32) - return (self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) + cdef int *value = (self.key + self.key_size) + return (self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) From 9951e871bcc2caeb95a6219b7d02a6f3b717e848 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 03:25:36 +0100 Subject: [PATCH 42/58] key_creator: return key class, not instance (remove repository arg) --- attic/archiver.py | 3 ++- attic/key.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 1b127f30..a8d8ff9b 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -57,7 +57,8 @@ class Archiver: """Initialize an empty repository""" print('Initializing repository at "%s"' % args.repository.orig) repository = self.open_repository(args.repository, create=True, exclusive=True) - key = key_creator(repository, args) + key_cls = key_creator(args) + key = key_cls.create(repository, args) manifest = Manifest(key, repository) manifest.write() repository.commit() diff --git a/attic/key.py b/attic/key.py index 723b321c..96543221 100644 --- a/attic/key.py +++ b/attic/key.py @@ -734,13 +734,13 @@ def compressor_creator(args): return compressor -def key_creator(repository, args): +def key_creator(args): if args.encryption == 'keyfile': - return KeyfileKey.create(repository, args) + return KeyfileKey if args.encryption == 'passphrase': - return PassphraseKey.create(repository, args) + return PassphraseKey if args.encryption == 'none': - return PlaintextKey.create(repository, args) + return PlaintextKey raise NotImplemented("no encryption %s" % args.encryption) From 0c183acb551322cc0a9d831ff3adba338977f852 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 11:40:13 +0100 Subject: [PATCH 43/58] flexible repository key_size (key like in indexing key, id_hash()) --- attic/archive.py | 6 +++--- attic/archiver.py | 12 +++++++----- attic/cache.py | 5 +++-- attic/hashindex.pyx | 7 ++++--- attic/helpers.py | 10 ++++++---- attic/key.py | 8 +++++++- attic/remote.py | 12 ++++++------ attic/repository.py | 28 +++++++++++++++++----------- attic/testsuite/archiver.py | 4 ++-- attic/testsuite/hashindex.py | 10 +++++----- attic/testsuite/key.py | 24 ++++++++++++------------ attic/testsuite/repository.py | 20 ++++++++++++-------- 12 files changed, 84 insertions(+), 62 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index 7f05fd8b..11976f6f 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -577,7 +577,7 @@ class ArchiveChecker: self.repository = repository self.init_chunks() self.key = self.identify_key(repository) - if Manifest.MANIFEST_ID not in self.chunks: + if Manifest.manifest_id(repository) not in self.chunks: self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) @@ -596,7 +596,7 @@ class ArchiveChecker: # Explicity set the initial hash table capacity to avoid performance issues # due to hash table "resonance" capacity = int(len(self.repository) * 1.2) - self.chunks = ChunkIndex(capacity) + self.chunks = ChunkIndex(capacity, key_size=self.repository.key_size) marker = None while True: result = self.repository.list(limit=10000, marker=marker) @@ -648,7 +648,7 @@ class ArchiveChecker: Missing and/or incorrect data is repaired when detected """ # Exclude the manifest from chunks - del self.chunks[Manifest.MANIFEST_ID] + del self.chunks[Manifest.manifest_id(self.repository)] def mark_as_possibly_superseded(id_): if self.chunks.get(id_, (0,))[0] == 0: diff --git a/attic/archiver.py b/attic/archiver.py index a8d8ff9b..acb16492 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -13,7 +13,7 @@ from attic import __version__ from attic.archive import Archive, ArchiveChecker from attic.repository import Repository from attic.cache import Cache -from attic.key import key_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT +from attic.key import key_creator, maccer_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ @@ -27,11 +27,11 @@ class Archiver: def __init__(self): self.exit_code = 0 - def open_repository(self, location, create=False, exclusive=False): + def open_repository(self, location, create=False, exclusive=False, key_size=None): if location.proto == 'ssh': - repository = RemoteRepository(location, create=create) + repository = RemoteRepository(location, create=create, key_size=key_size) else: - repository = Repository(location.path, create=create, exclusive=exclusive) + repository = Repository(location.path, create=create, exclusive=exclusive, key_size=key_size) repository._location = location return repository @@ -56,8 +56,10 @@ class Archiver: def do_init(self, args): """Initialize an empty repository""" print('Initializing repository at "%s"' % args.repository.orig) - repository = self.open_repository(args.repository, create=True, exclusive=True) key_cls = key_creator(args) + maccer_cls = maccer_creator(args, key_cls) + repository = self.open_repository(args.repository, create=True, exclusive=True, + key_size=maccer_cls.digest_size) key = key_cls.create(repository, args) manifest = Manifest(key, repository) manifest.write() diff --git a/attic/cache.py b/attic/cache.py index 8f35f00c..064f5a58 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -51,7 +51,7 @@ class Cache: config.set('cache', 'manifest', '') with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) - ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) + ChunkIndex(key_size=self.repository.key_size).write(os.path.join(self.path, 'chunks').encode('utf-8')) with open(os.path.join(self.path, 'files'), 'w') as fd: pass # empty file @@ -67,7 +67,8 @@ class Cache: self.id = self.config.get('cache', 'repository') self.manifest_id = unhexlify(self.config.get('cache', 'manifest')) self.timestamp = self.config.get('cache', 'timestamp', fallback=None) - self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8')) + self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'), + key_size=self.repository.key_size) self.files = None def close(self): diff --git a/attic/hashindex.pyx b/attic/hashindex.pyx index 1765151d..7a65cbce 100644 --- a/attic/hashindex.pyx +++ b/attic/hashindex.pyx @@ -28,7 +28,8 @@ cdef class IndexBase: cdef HashIndex *index cdef int key_size - def __cinit__(self, capacity=0, path=None, key_size=32): + def __cinit__(self, capacity=0, path=None, key_size=None): + assert key_size is not None self.key_size = key_size if path: self.index = hashindex_read(os.fsencode(path)) @@ -44,8 +45,8 @@ cdef class IndexBase: hashindex_free(self.index) @classmethod - def read(cls, path): - return cls(path=path) + def read(cls, path, key_size=None): + return cls(path=path, key_size=key_size) def write(self, path): if not hashindex_write(self.index, os.fsencode(path)): diff --git a/attic/helpers.py b/attic/helpers.py index 5e7965dc..57779ebe 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -81,18 +81,20 @@ def check_extension_modules(): class Manifest: - MANIFEST_ID = b'\0' * 32 - def __init__(self, key, repository): self.archives = {} self.config = {} self.key = key self.repository = repository + @classmethod + def manifest_id(cls, repository): + return b'\0' * repository.key_size + @classmethod def load(cls, repository, key=None): from .key import key_factory - cdata = repository.get(cls.MANIFEST_ID) + cdata = repository.get(cls.manifest_id(repository)) if not key: key = key_factory(repository, cdata) manifest = cls(key, repository) @@ -117,7 +119,7 @@ class Manifest: 'config': self.config, })) self.id = self.key.id_hash(data) - self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) + self.repository.put(self.manifest_id(self.repository), self.key.encrypt(data)) def prune_within(archives, within): diff --git a/attic/key.py b/attic/key.py index 96543221..5617c105 100644 --- a/attic/key.py +++ b/attic/key.py @@ -69,6 +69,7 @@ class HMAC(hmac.HMAC): class SHA256(object): # note: can't subclass sha256 TYPE = 0 + digest_size = 32 def __init__(self, key, data=b''): # signature is like for a MAC, we ignore the key as this is a simple hash @@ -89,6 +90,7 @@ class SHA256(object): # note: can't subclass sha256 class SHA512_256(sha512_256): """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" TYPE = 1 + digest_size = 32 def __init__(self, key, data): # signature is like for a MAC, we ignore the key as this is a simple hash @@ -99,6 +101,7 @@ class SHA512_256(sha512_256): class GHASH: TYPE = 2 + digest_size = 16 def __init__(self, key, data): # signature is like for a MAC, we ignore the key as this is a simple hash @@ -112,11 +115,12 @@ class GHASH: # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data mac_cipher.add(bytes(self.data)) hash, _ = mac_cipher.compute_mac_and_encrypt(b'') - return hash + b'\0'*16 # XXX hashindex code wants 32 bytes (256 bit) + return hash class HMAC_SHA256(HMAC): TYPE = 10 + digest_size = 32 def __init__(self, key, data): if key is None: @@ -126,6 +130,7 @@ class HMAC_SHA256(HMAC): class HMAC_SHA512_256(HMAC): TYPE = 11 + digest_size = 32 def __init__(self, key, data): if key is None: @@ -135,6 +140,7 @@ class HMAC_SHA512_256(HMAC): class GMAC(GHASH): TYPE = 20 + digest_size = 16 def __init__(self, key, data): super().__init__(None, data) diff --git a/attic/remote.py b/attic/remote.py index d7e1ecfa..8d0456cd 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -89,7 +89,7 @@ class RepositoryServer: def negotiate(self, versions): return 1 - def open(self, path, create=False): + def open(self, path, create=False, key_size=None): path = os.fsdecode(path) if path.startswith('/~'): path = path[1:] @@ -100,8 +100,8 @@ class RepositoryServer: break else: raise PathNotAllowed(path) - self.repository = Repository(path, create) - return self.repository.id + self.repository = Repository(path, create, key_size=key_size) + return self.repository.id, self.repository.key_size class RemoteRepository: @@ -112,7 +112,7 @@ class RemoteRepository: def __init__(self, name): self.name = name - def __init__(self, location, create=False): + def __init__(self, location, create=False, key_size=None): self.location = location self.preload_ids = [] self.msgid = 0 @@ -144,7 +144,7 @@ class RemoteRepository: version = self.call('negotiate', 1) if version != 1: raise Exception('Server insisted on using unsupported protocol version %d' % version) - self.id = self.call('open', location.path, create) + self.id, self.key_size = self.call('open', location.path, create, key_size) def __del__(self): self.close() @@ -299,7 +299,7 @@ class RepositoryCache: def initialize(self): self.tmppath = tempfile.mkdtemp() - self.index = NSIndex() + self.index = NSIndex(key_size=self.repository.key_size) self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b') def cleanup(self): diff --git a/attic/repository.py b/attic/repository.py index 255382f6..74ede6fa 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -46,22 +46,23 @@ class Repository: class ObjectNotFound(Error): """Object with key {} not found in repository {}.""" - def __init__(self, path, create=False, exclusive=False): + def __init__(self, path, create=False, exclusive=False, key_size=None): self.path = path self.io = None self.lock = None self.index = None self._active_txn = False if create: - self.create(path) + self.create(path, key_size) self.open(path, exclusive) def __del__(self): self.close() - def create(self, path): + def create(self, path, key_size): """Create a new empty repository at `path` """ + assert key_size is not None if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)): raise self.AlreadyExists(path) if not os.path.exists(path): @@ -74,6 +75,7 @@ class Repository: config.set('repository', 'version', '1') config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR) config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE) + config.set('repository', 'key_size', key_size) config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii')) with open(os.path.join(path, 'config'), 'w') as fd: config.write(fd) @@ -109,10 +111,12 @@ class Repository: if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: raise self.InvalidRepository(path) self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive) + # legacy attic repositories always have key size 32B (256b) + self.key_size = self.config.getint('repository', 'key_size', fallback=32) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') self.id = unhexlify(self.config.get('repository', 'id').strip()) - self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir) + self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir, self.key_size) def close(self): if self.lock: @@ -132,8 +136,9 @@ class Repository: def open_index(self, transaction_id): if transaction_id is None: - return NSIndex() - return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8')) + return NSIndex(key_size=self.key_size) + return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'), + key_size=self.key_size) def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -382,8 +387,6 @@ class LoggedIO: header_fmt = struct.Struct('= MIN_BUCKETS idx_name = tempfile.NamedTemporaryFile() - idx = NSIndex() + idx = NSIndex(key_size=32) idx.write(idx_name.name) initial_size = os.path.getsize(idx_name.name) self.assert_equal(len(idx), 0) @@ -67,7 +67,7 @@ class HashIndexTestCase(AtticTestCase): self.assert_equal(initial_size, os.path.getsize(idx_name.name)) def test_iteritems(self): - idx = NSIndex() + idx = NSIndex(key_size=32) for x in range(100): idx[bytes('%-0.32d' % x, 'ascii')] = x, x all = list(idx.iteritems()) diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 11c87100..371ba8d8 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -19,20 +19,20 @@ class KeyTestCase(AtticTestCase): keyfile2_key_file = """ ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 -hqlhbGdvcml0aG2kZ21hY6RoYXNo2gAgY7jwSMnBwpqD3Fk/aAdSAgAAAAAAAAAAAAAAAA -AAAACqaXRlcmF0aW9uc84AAYagp3ZlcnNpb24BpHNhbHTaACASqCq8G6a/K/W+bOrNDW65 -Sfl9ZHrTEtq6l+AMUmATxKRkYXRh2gDQuDVCijDzeZDD/JLPrOtsQL/vrZEWvCt5RuXFOt -tTZfbCJDmv2nt4KvYToVsp82pffZDcsLaOOBCTGurpkdefsdiLMgGiLlbrsXlES+fbKZfq -Tx2x2DjU4L1bFxuoypDIdk2lB3S98ZpFZ6yd1XtDBVTQ34FZTlDXIZ5HyuxAJBrGKYj/Un -Fk24N5xSoPfeQhE3r7hqEsGwEEX0s6sg0LHMGyc4xSBb13iZxWRlSdnvBC7teIeevhT/DU -scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA== +hqRzYWx02gAgA1l4jfyv22y6U/mxxDT8HodSWAcX0g3nOESrQcNnBsundmVyc2lvbgGqaX +RlcmF0aW9uc84AAYagqWFsZ29yaXRobaRnbWFjpGhhc2iw7eaB54JssAOnM1S4S9CeTaRk +YXRh2gDQzmuyg3iYjMeTLObY+ybI+QfngB+5mmHeEAfBa42fuEZgqM3rYyMj2XfgvamF+O +0asvhEyy9om190FaOxQ4RiiTMNqSP0FKLmd1i5ZyDMfRyp7JbscRFs9Ryk28yXWkv0MgQy +EAYlaycY+6lWdRSgEPxidyPl9t9dr2AI/UuiQytwqmcmXgWD6Px6wgpOS/4AcRmEvDqIIl +Rc2xsu+RevGAxk5rnrIIRPr7WB5R2cinzEn9ylDgBDt9LZbq706ELgtwVTnjWB8FBTPwVI +vLTTXQ== """.strip() keyfile2_cdata = unhexlify(re.sub('\W', '', """ - 0393c420fd6e9ac6f8c49c4789d1c924c14c309200000000000000000000000000000000 - 9600001402c41000000000000000000000000000000000c2c4071352fe2286e3ed + 0393c4102e5ce8f5e9477c9e4ce2de453121aa139600001402c41000000000000000000000000000000000 + c2c407b0147a64a379d1 """)) - keyfile2_id = unhexlify('d4954bcf8d7b1762356e91b2611c727800000000000000000000000000000000') + keyfile2_id = unhexlify('dd9451069663931c8abd85452d016733') def setUp(self): self.tmppath = tempfile.mkdtemp() @@ -60,7 +60,7 @@ scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA== def test_plaintext(self): key = PlaintextKey.create(None, self.MockArgs()) data = b'foo' - self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb300000000000000000000000000000000') + self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb3') self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data))) def test_keyfile(self): @@ -115,5 +115,5 @@ scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA== self.assert_equal(key.enc_key, key2.enc_key) self.assert_equal(key.chunk_seed, key2.chunk_seed) data = b'foo' - self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde050100000000000000000000000000000000') + self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde0501') self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data))) diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py index 18946a3e..c6ea1edc 100644 --- a/attic/testsuite/repository.py +++ b/attic/testsuite/repository.py @@ -10,13 +10,14 @@ from attic.testsuite import AtticTestCase class RepositoryTestCaseBase(AtticTestCase): + key_size = 32 - def open(self, create=False): - return Repository(os.path.join(self.tmppath, 'repository'), create=create) + def open(self, create=False, key_size=None): + return Repository(os.path.join(self.tmppath, 'repository'), create=create, key_size=key_size) def setUp(self): self.tmppath = tempfile.mkdtemp() - self.repository = self.open(create=True) + self.repository = self.open(create=True, key_size=self.key_size) def tearDown(self): self.repository.close() @@ -207,7 +208,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1] def open_index(self): - return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head()))) + return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())), + key_size=self.key_size) def corrupt_object(self, id_): idx = self.open_index() @@ -315,8 +317,9 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): class RemoteRepositoryTestCase(RepositoryTestCase): - def open(self, create=False): - return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + def open(self, create=False, key_size=None): + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + create=create, key_size=key_size) def test_invalid_rpc(self): self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) @@ -324,5 +327,6 @@ class RemoteRepositoryTestCase(RepositoryTestCase): class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): - def open(self, create=False): - return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + def open(self, create=False, key_size=None): + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), + create=create, key_size=key_size) From 5ae3fa29273ac5a996de997aa3e28b66eecd92c2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 23 Mar 2015 14:01:47 +0100 Subject: [PATCH 44/58] 128bit increment_iv implementation/pack/unpack remove strange "lower 64bits of IV" stuff, 64bit pack/unpack. while the 64bit counter 295EB "limit" was maybe high enough, always dealing with dissecting and reassembling the IV was a pain. --- attic/crypto.pyx | 32 +++++++++++++++++++++++++++++--- attic/key.py | 21 +-------------------- attic/testsuite/archiver.py | 4 ++-- attic/testsuite/crypto.py | 26 ++++++++++++++++++++++---- attic/testsuite/key.py | 1 - 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index 4e8e259a..4a82d8c6 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -55,11 +55,21 @@ cdef extern from "openssl/evp.h": import struct _int = struct.Struct('>I') -_long = struct.Struct('>Q') +_2long = struct.Struct('>QQ') bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0] -bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] -long_to_bytes = lambda x: _long.pack(x) + + +def bytes16_to_int(b, offset=0): + h, l = _2long.unpack_from(b, offset) + return (h << 64) + l + + +def int_to_bytes16(i): + max_uint64 = 0xffffffffffffffff + l = i & max_uint64 + h = (i >> 64) & max_uint64 + return _2long.pack(h, l) def num_aes_blocks(length): @@ -69,6 +79,22 @@ def num_aes_blocks(length): return (length + 15) // 16 +def increment_iv(iv, amount): + """ + increment the given IV considering that bytes of data was + encrypted based on it. In CTR / GCM mode, the IV is just a counter and + must never repeat. + + :param iv: current IV, 16 bytes (128 bit) + :param amount: amount of data (in bytes) that was encrypted + :return: new IV, 16 bytes (128 bit) + """ + iv = bytes16_to_int(iv) + iv += num_aes_blocks(amount) + iv = int_to_bytes16(iv) + return iv + + def pbkdf2_sha256(password, salt, iterations, size): """Password based key derivation function 2 (RFC2898) """ diff --git a/attic/key.py b/attic/key.py index 5617c105..007e2aa4 100644 --- a/attic/key.py +++ b/attic/key.py @@ -17,7 +17,7 @@ except ImportError: lzma = None from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \ - bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks + bytes_to_int, increment_iv from attic.helpers import IntegrityError, get_keys_dir, Error # we do not store the full IV on disk, as the upper 8 bytes are expected to be @@ -216,25 +216,6 @@ class PLAIN: return data -def increment_iv(iv, amount): - """ - increment the given IV considering that bytes of data was - encrypted based on it. In CTR / GCM mode, the IV is just a counter and - must never repeat. - - :param iv: current IV, 16 bytes (128 bit) - :param amount: amount of data (in bytes) that was encrypted - :return: new IV, 16 bytes (128 bit) - """ - # TODO: code assumes that the last 8 bytes are enough, the upper 8 always zero - iv_last8 = iv[8:] - current_iv = bytes_to_long(iv_last8) - new_iv = current_iv + num_aes_blocks(amount) - iv_last8 = long_to_bytes(new_iv) - iv = PREFIX + iv_last8 - return iv - - def get_aad(meta): """get additional authenticated data for AEAD ciphers""" if meta.legacy: diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 96fa476e..24fc9237 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -11,7 +11,7 @@ from hashlib import sha256 from attic import xattr from attic.archive import Archive, ChunkBuffer from attic.archiver import Archiver -from attic.crypto import bytes_to_long, num_aes_blocks +from attic.crypto import bytes16_to_int, num_aes_blocks from attic.helpers import Manifest from attic.key import parser from attic.remote import RemoteRepository, PathNotAllowed @@ -385,7 +385,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): seen.add(hash) mac, meta, data = parser(data) num_blocks = num_aes_blocks(len(data)) - nonce = bytes_to_long(meta.iv, 8) + nonce = bytes16_to_int(meta.iv) for counter in range(nonce, nonce + num_blocks): self.assert_not_in(counter, used) used.add(counter) diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index bf3fe912..1c1795be 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -1,7 +1,7 @@ from binascii import hexlify from attic.testsuite import AtticTestCase from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_GCM_MODE, AES_CTR_MODE, \ - bytes_to_long, bytes_to_int, long_to_bytes + bytes_to_int, bytes16_to_int, int_to_bytes16, increment_iv class CryptoTestCase(AtticTestCase): @@ -9,9 +9,27 @@ class CryptoTestCase(AtticTestCase): def test_bytes_to_int(self): self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1) - def test_bytes_to_long(self): - self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1) - self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1') + def test_bytes16_to_int(self): + i, b = 1, b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1' + self.assert_equal(bytes16_to_int(b), i) + self.assert_equal(int_to_bytes16(i), b) + i, b = (1 << 64) + 2, b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\2' + self.assert_equal(bytes16_to_int(b), i) + self.assert_equal(int_to_bytes16(i), b) + + def test_increment_iv(self): + tests = [ + # iv, amount, iv_expected + (0, 0, 0), + (0, 15, 1), + (0, 16, 1), + (0, 17, 2), + (0xffffffffffffffff, 32, 0x10000000000000001), + ] + for iv, amount, iv_expected in tests: + iv = int_to_bytes16(iv) + iv_expected = int_to_bytes16(iv_expected) + self.assert_equal(increment_iv(iv, amount), iv_expected) def test_pbkdf2_sha256(self): self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)), diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 371ba8d8..8651a978 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -3,7 +3,6 @@ import re import shutil import tempfile from binascii import hexlify -from attic.crypto import bytes_to_long from attic.testsuite import AtticTestCase from attic.key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT, increment_iv from attic.helpers import Location, unhexlify From 626f9182ad025dd99a4e8c0b8325c2735727cb1b Mon Sep 17 00:00:00 2001 From: Hubert Hesse Date: Wed, 25 Mar 2015 12:58:06 +0100 Subject: [PATCH 45/58] Add Travis CI badge to README --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 0903f04f..15ba4463 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,5 @@ +|build| + What is Attic? -------------- Attic is a deduplicating backup program. The main goal of Attic is to provide @@ -55,3 +57,7 @@ The tests are in the attic/testsuite package. To run the test suite use the following command:: $ fakeroot -u python -m attic.testsuite.run + +.. |build| image:: https://travis-ci.org/attic/merge.svg + :alt: Build Status + :target: https://travis-ci.org/attic/merge From 7c6677934504456562facc7980a4a4d880828e60 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 27 Mar 2015 08:39:38 +0100 Subject: [PATCH 46/58] use blosc for multithreaded and fast compression, including lz4, lz4hc --- attic/archiver.py | 5 +++ attic/key.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 9 +++++- 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 0f4620d1..f4b531b7 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -517,6 +517,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") - 00 no compression - 01..09 zlib levels 1..9 (1 means low compression, 9 max. compression) - 10..19 lzma levels 0..9 (0 means low compression, 9 max. compression) + - 20..29 lz4 (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 30..39 lz4hc (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 40..49 blosclz (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 50..59 snappy (blosc) levels 0..9 (0 = no, 9 = max. compression) + - 60..69 zlib (blosc) levels 0..9 (0 = no, 9 = max. compression) --cipher METHODs (default: %02d or %02d) diff --git a/attic/key.py b/attic/key.py index 007e2aa4..8b3b3d40 100644 --- a/attic/key.py +++ b/attic/key.py @@ -16,6 +16,11 @@ except ImportError: except ImportError: lzma = None +try: + import blosc +except ImportError: + blosc = None + from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \ bytes_to_int, increment_iv from attic.helpers import IntegrityError, get_keys_dir, Error @@ -195,6 +200,68 @@ class LzmaCompressor(object): # uses 10..19 in the mapping return lzma.decompress(data) +class BLOSCCompressor(object): + TYPE = 0 # override in subclass + LEVELS = range(10) + CNAME = '' # override in subclass + + def __init__(self): + if blosc is None: + raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME) + if self.CNAME not in blosc.compressor_list(): + raise NotImplemented("%s compression is not supported by blosc" % self.CNAME) + blosc.set_blocksize(8192) # maybe 8 threads processing a 64KB chunks -> 8KB block + + def _get_level(self): + raise NotImplemented + + def compress(self, data): + return blosc.compress(bytes(data), 1, cname=self.CNAME, clevel=self._get_level()) + + def decompress(self, data): + return blosc.decompress(data) + + +class LZ4Compressor(BLOSCCompressor): + TYPE = 20 + CNAME = 'lz4' + + def _get_level(self): + return self.TYPE - LZ4Compressor.TYPE + + +class LZ4HCCompressor(BLOSCCompressor): + TYPE = 30 + CNAME = 'lz4hc' + + def _get_level(self): + return self.TYPE - LZ4HCCompressor.TYPE + + +class BLOSCLZCompressor(BLOSCCompressor): + TYPE = 40 + CNAME = 'blosclz' + + def _get_level(self): + return self.TYPE - BLOSCLZCompressor.TYPE + + +class SnappyCompressor(BLOSCCompressor): + TYPE = 50 + CNAME = 'snappy' + + def _get_level(self): + return self.TYPE - SnappyCompressor.TYPE + + +class BLOSCZlibCompressor(BLOSCCompressor): + TYPE = 60 + CNAME = 'zlib' + + def _get_level(self): + return self.TYPE - BLOSCZlibCompressor.TYPE + + # default is optimized for speed COMPR_DEFAULT = NullCompressor.TYPE # no compression @@ -581,6 +648,21 @@ for level in ZlibCompressor.LEVELS: for preset in LzmaCompressor.PRESETS: compressor_mapping[LzmaCompressor.TYPE + preset] = \ type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset)) +for level in LZ4Compressor.LEVELS: + compressor_mapping[LZ4Compressor.TYPE + level] = \ + type('LZ4CompressorLevel%d' % level, (LZ4Compressor, ), dict(TYPE=LZ4Compressor.TYPE + level)) +for level in LZ4HCCompressor.LEVELS: + compressor_mapping[LZ4HCCompressor.TYPE + level] = \ + type('LZ4HCCompressorLevel%d' % level, (LZ4HCCompressor, ), dict(TYPE=LZ4HCCompressor.TYPE + level)) +for level in BLOSCLZCompressor.LEVELS: + compressor_mapping[BLOSCLZCompressor.TYPE + level] = \ + type('BLOSCLZCompressorLevel%d' % level, (BLOSCLZCompressor, ), dict(TYPE=BLOSCLZCompressor.TYPE + level)) +for level in SnappyCompressor.LEVELS: + compressor_mapping[SnappyCompressor.TYPE + level] = \ + type('SnappyCompressorLevel%d' % level, (SnappyCompressor, ), dict(TYPE=SnappyCompressor.TYPE + level)) +for level in BLOSCZlibCompressor.LEVELS: + compressor_mapping[BLOSCZlibCompressor.TYPE + level] = \ + type('BLOSCZlibCompressorLevel%d' % level, (BLOSCZlibCompressor, ), dict(TYPE=BLOSCZlibCompressor.TYPE + level)) # overwrite 0 with NullCompressor compressor_mapping[NullCompressor.TYPE] = NullCompressor diff --git a/setup.py b/setup.py index e2970208..b8fbe4b8 100644 --- a/setup.py +++ b/setup.py @@ -98,10 +98,16 @@ elif platform == 'Darwin': # msgpack pure python data corruption was fixed in 0.4.6. # Also, we might use some rather recent API features. -install_requires=['msgpack-python>=0.4.6'] +install_requires=['msgpack-python>=0.4.6', 'blosc>1.2.4'] if sys.version_info < (3, 3): install_requires.append('backports.lzma') +dependency_links=[ + # blosc 1.2.5 is not released yet, but needed for set_blocksize so we can + # get parallel compression even if only feeding it 64KB chunks of data... + "https://github.com/Blosc/python-blosc/archive/master.zip#egg=blosc-1.2.5" +] + setup( name='Attic', version=versioneer.get_version(), @@ -129,4 +135,5 @@ setup( cmdclass=cmdclass, ext_modules=ext_modules, install_requires=install_requires, + dependency_links=dependency_links, ) From 32fa83c1568390c8ab2d7284eb4bbd8cf99a2eb6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 27 Mar 2015 21:40:11 +0100 Subject: [PATCH 47/58] blosc: increase blocksize to 16kiB --- attic/key.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/key.py b/attic/key.py index 8b3b3d40..623c6eeb 100644 --- a/attic/key.py +++ b/attic/key.py @@ -210,7 +210,7 @@ class BLOSCCompressor(object): raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME) if self.CNAME not in blosc.compressor_list(): raise NotImplemented("%s compression is not supported by blosc" % self.CNAME) - blosc.set_blocksize(8192) # maybe 8 threads processing a 64KB chunks -> 8KB block + blosc.set_blocksize(16384) # 16kiB is the minimum, so 64kiB are enough for 4 threads def _get_level(self): raise NotImplemented From 88b62282ad3181e0a116836e5387c805d9f4f0ff Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 31 Mar 2015 03:05:52 +0200 Subject: [PATCH 48/58] add (hmac-)sha1/sha512 --- attic/archiver.py | 4 +++ attic/key.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index f4b531b7..c214d3b3 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -534,8 +534,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") - 00 sha256 (simple hash, no MAC, faster on 32bit CPU) - 01 sha512-256 (simple hash, no MAC, faster on 64bit CPU) - 02 ghash (simple hash, no MAC, fastest on CPUs with AES-GCM support) + - 03 sha1 (simple hash, no MAC, fastest on CPUs without AES-GCM support) + - 04 sha512 (simple hash, no MAC, faster on 64bit CPU) - 10 hmac-sha256 (MAC, faster on 32bit CPU) - 11 hmac-sha512-256 (MAC, faster on 64bit CPU) + - 13 hmac-sha1 (MAC, fastest on CPUs without AES-GCM support) + - 14 hmac-sha512 (MAC, faster on 64bit CPU) - 20 gmac (MAC, fastest on CPUs with AES-GCM support) """ % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT)) subparser = subparsers.add_parser('init', parents=[common_parser], diff --git a/attic/key.py b/attic/key.py index 623c6eeb..7b603954 100644 --- a/attic/key.py +++ b/attic/key.py @@ -5,7 +5,7 @@ import msgpack import textwrap from collections import namedtuple import hmac -from hashlib import sha256, sha512 +from hashlib import sha1, sha256, sha512 import zlib try: @@ -123,6 +123,46 @@ class GHASH: return hash +class SHA1(object): # note: can't subclass sha1 + TYPE = 3 + digest_size = 20 + + def __init__(self, key, data=b''): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a HMAC if you have a key") + self.h = sha1(data) + + def update(self, data): + self.h.update(data) + + def digest(self): + return self.h.digest() + + def hexdigest(self): + return self.h.hexdigest() + + +class SHA512(object): # note: can't subclass sha512 + TYPE = 4 + digest_size = 64 + + def __init__(self, key, data=b''): + # signature is like for a MAC, we ignore the key as this is a simple hash + if key is not None: + raise Exception("use a HMAC if you have a key") + self.h = sha512(data) + + def update(self, data): + self.h.update(data) + + def digest(self): + return self.h.digest() + + def hexdigest(self): + return self.h.hexdigest() + + class HMAC_SHA256(HMAC): TYPE = 10 digest_size = 32 @@ -143,6 +183,26 @@ class HMAC_SHA512_256(HMAC): super().__init__(key, data, sha512_256) +class HMAC_SHA1(HMAC): + TYPE = 13 + digest_size = 20 + + def __init__(self, key, data): + if key is None: + raise Exception("do not use HMAC if you don't have a key") + super().__init__(key, data, sha1) + + +class HMAC_SHA512(HMAC): + TYPE = 14 + digest_size = 64 + + def __init__(self, key, data): + if key is None: + raise Exception("do not use HMAC if you don't have a key") + super().__init__(key, data, sha512) + + class GMAC(GHASH): TYPE = 20 digest_size = 16 From 2541d684b4810a7f9e0dcc966ed30bdfc1bfcad8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 31 Mar 2015 03:29:16 +0200 Subject: [PATCH 49/58] update README do not duplicate requirements there, but rather point to setup.py --- README.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 15ba4463..38c46997 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ Space efficient storage Optional data encryption All data can be protected using 256-bit AES encryption and data integrity - and authenticity is verified using HMAC-SHA256. + and authenticity is verified using a MAC (message authentication code). Off-site backups Attic can store data on any remote host accessible over SSH. This is @@ -36,9 +36,10 @@ Backups mountable as filesystems What do I need? --------------- -Attic requires Python 3.2 or above to work. Besides Python, Attic also requires -msgpack-python and sufficiently recent OpenSSL (>= 1.0.0). +Attic requires Python 3.2 or above to work. +Attic also requires a sufficiently recent OpenSSL (>= 1.0.0). In order to mount archives as filesystems, llfuse is required. +For other python requirements, please see setup.py install_requires. How do I install it? -------------------- @@ -56,7 +57,7 @@ Where are the tests? The tests are in the attic/testsuite package. To run the test suite use the following command:: - $ fakeroot -u python -m attic.testsuite.run + $ fakeroot -u tox # you need to have tox installed .. |build| image:: https://travis-ci.org/attic/merge.svg :alt: Build Status From 853d12c02177d004e5920480f4d5e786987e96a7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 31 Mar 2015 19:32:42 +0200 Subject: [PATCH 50/58] fix: add (hmac-)sha1/sha512 to maccer mapping --- attic/key.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/attic/key.py b/attic/key.py index 7b603954..70876066 100644 --- a/attic/key.py +++ b/attic/key.py @@ -736,12 +736,16 @@ keyer_mapping = { maccer_mapping = { # simple hashes, not MACs (but MAC-like class __init__ method signature): + SHA1.TYPE: SHA1, SHA256.TYPE: SHA256, SHA512_256.TYPE: SHA512_256, + SHA512.TYPE: SHA512, GHASH.TYPE: GHASH, # MACs: + HMAC_SHA1.TYPE: HMAC_SHA1, HMAC_SHA256.TYPE: HMAC_SHA256, HMAC_SHA512_256.TYPE: HMAC_SHA512_256, + HMAC_SHA512.TYPE: HMAC_SHA512, GMAC.TYPE: GMAC, } From 14a20516d30b455c766f288091ec5b3cdcdd5b32 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 1 Apr 2015 00:05:42 +0200 Subject: [PATCH 51/58] simplify / dedup hashers and maccers --- attic/key.py | 122 +++++++++++++++++++-------------------------------- 1 file changed, 45 insertions(+), 77 deletions(-) diff --git a/attic/key.py b/attic/key.py index 70876066..b528156f 100644 --- a/attic/key.py +++ b/attic/key.py @@ -62,25 +62,19 @@ class sha512_256(object): # note: can't subclass sha512 return new -class HMAC(hmac.HMAC): - """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews - """ - def update(self, msg): - self.inner.update(msg) - - # HASH / MAC stuff below all has a mac-like interface, so it can be used in the same way. # special case: hashes do not use keys (and thus, do not sign/authenticate) -class SHA256(object): # note: can't subclass sha256 - TYPE = 0 - digest_size = 32 +class HASH: # note: we can't subclass sha1/sha256/sha512 + TYPE = 0 # override in subclass + digest_size = 0 # override in subclass + hash_func = None # override in subclass def __init__(self, key, data=b''): # signature is like for a MAC, we ignore the key as this is a simple hash if key is not None: raise Exception("use a HMAC if you have a key") - self.h = sha256(data) + self.h = self.hash_func(data) def update(self, data): self.h.update(data) @@ -92,126 +86,100 @@ class SHA256(object): # note: can't subclass sha256 return self.h.hexdigest() -class SHA512_256(sha512_256): - """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms""" +class SHA256(HASH): + TYPE = 0 + digest_size = 32 + hash_func = sha256 + + +class SHA512_256(HASH): TYPE = 1 digest_size = 32 - - def __init__(self, key, data): - # signature is like for a MAC, we ignore the key as this is a simple hash - if key is not None: - raise Exception("use a HMAC if you have a key") - super().__init__(data) + hash_func = sha512_256 class GHASH: TYPE = 2 digest_size = 16 - def __init__(self, key, data): + def __init__(self, key, data=b''): # signature is like for a MAC, we ignore the key as this is a simple hash if key is not None: raise Exception("use a MAC if you have a key") - self.key = b'\0' * 32 - self.data = data + self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=b'\0' * 32, iv=b'\0' * 16) + if data: + self.update(data) + + def update(self, data): + # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data + self.mac_cipher.add(bytes(data)) def digest(self): - mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=self.key, iv=b'\0' * 16) - # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data - mac_cipher.add(bytes(self.data)) - hash, _ = mac_cipher.compute_mac_and_encrypt(b'') + hash, _ = self.mac_cipher.compute_mac_and_encrypt(b'') return hash -class SHA1(object): # note: can't subclass sha1 +class SHA1(HASH): TYPE = 3 digest_size = 20 - - def __init__(self, key, data=b''): - # signature is like for a MAC, we ignore the key as this is a simple hash - if key is not None: - raise Exception("use a HMAC if you have a key") - self.h = sha1(data) - - def update(self, data): - self.h.update(data) - - def digest(self): - return self.h.digest() - - def hexdigest(self): - return self.h.hexdigest() + hash_func = sha1 -class SHA512(object): # note: can't subclass sha512 +class SHA512(HASH): TYPE = 4 digest_size = 64 + hash_func = sha512 - def __init__(self, key, data=b''): - # signature is like for a MAC, we ignore the key as this is a simple hash - if key is not None: - raise Exception("use a HMAC if you have a key") - self.h = sha512(data) - def update(self, data): - self.h.update(data) +class HMAC(hmac.HMAC): + TYPE = 0 # override in subclass + digest_size = 0 # override in subclass + hash_func = None # override in subclass - def digest(self): - return self.h.digest() + def __init__(self, key, data): + if key is None: + raise Exception("do not use HMAC if you don't have a key") + super().__init__(key, data, self.hash_func) - def hexdigest(self): - return self.h.hexdigest() + def update(self, msg): + # Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews + self.inner.update(msg) class HMAC_SHA256(HMAC): TYPE = 10 digest_size = 32 - - def __init__(self, key, data): - if key is None: - raise Exception("do not use HMAC if you don't have a key") - super().__init__(key, data, sha256) + hash_func = sha256 class HMAC_SHA512_256(HMAC): TYPE = 11 digest_size = 32 - - def __init__(self, key, data): - if key is None: - raise Exception("do not use HMAC if you don't have a key") - super().__init__(key, data, sha512_256) + hash_func = sha512_256 class HMAC_SHA1(HMAC): TYPE = 13 digest_size = 20 - - def __init__(self, key, data): - if key is None: - raise Exception("do not use HMAC if you don't have a key") - super().__init__(key, data, sha1) + hash_func = sha1 class HMAC_SHA512(HMAC): TYPE = 14 digest_size = 64 - - def __init__(self, key, data): - if key is None: - raise Exception("do not use HMAC if you don't have a key") - super().__init__(key, data, sha512) + hash_func = sha512 class GMAC(GHASH): TYPE = 20 digest_size = 16 - def __init__(self, key, data): - super().__init__(None, data) + def __init__(self, key, data=b''): if key is None: raise Exception("do not use GMAC if you don't have a key") - self.key = key + self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0' * 16) + if data: + self.update(data) # defaults are optimized for speed on modern CPUs with AES hw support From c453a31798cb73612d59514ba84931bd7919ed64 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 1 Apr 2015 00:33:20 +0200 Subject: [PATCH 52/58] key module: minor cleanup of zero-PREFIX handling/comment --- attic/key.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/attic/key.py b/attic/key.py index b528156f..a0e6f60c 100644 --- a/attic/key.py +++ b/attic/key.py @@ -25,10 +25,6 @@ from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES bytes_to_int, increment_iv from attic.helpers import IntegrityError, get_keys_dir, Error -# we do not store the full IV on disk, as the upper 8 bytes are expected to be -# zero anyway as the full IV is a 128bit counter. PREFIX are the upper 8 bytes, -# stored_iv are the lower 8 Bytes. -PREFIX = b'\0' * 8 Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy') @@ -188,7 +184,6 @@ MAC_DEFAULT = GMAC.TYPE # compressor classes, all same interface -# special case: zlib level 0 is "no compression" class NullCompressor(object): # uses 0 in the mapping TYPE = 0 @@ -762,7 +757,9 @@ def legacy_parser(all_data, key_type): # all rather hardcoded mac_type = HMAC_SHA256.TYPE mac = all_data[offset:offset+32] cipher_type = AES_CTR_HMAC.TYPE - iv = PREFIX + all_data[offset+32:offset+40] + # legacy attic did not store the full IV on disk, as the upper 8 bytes + # are expected to be zero anyway as the full IV is a 128bit counter. + iv = b'\0' * 8 + all_data[offset+32:offset+40] data = all_data[offset+40:] meta = Meta(compr_type=6, key_type=key_type, mac_type=mac_type, cipher_type=cipher_type, iv=iv, legacy=True) From c3e84cc490dcc90a100d5a061afb13adb2d2f597 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 1 Apr 2015 01:14:51 +0200 Subject: [PATCH 53/58] tox: make pip install process dependency_links needed as long as we need the git version of python-blosc, should be fixed soon by python-blosc 1.2.5. --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 327ddf0c..4d21df90 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,7 @@ envlist = py32, py33, py34 [testenv] # Change dir to avoid import problem changedir = docs +install_command = pip install --process-dependency-links {opts} {packages} commands = {envpython} -m attic.testsuite.run -bv [] [testenv:py32] From 64813ebd7436d3907b56258710f94bbb5488024a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 1 Apr 2015 12:44:05 +0200 Subject: [PATCH 54/58] fix travis install to process dependency links also currently needed for installing blosc from github repo --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7e3471b1..137319eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,6 @@ python: install: - "sudo apt-get install -y libacl1-dev" - "pip install --use-mirrors Cython" - - "pip install -e ." + - "pip install --process-dependency-links -e ." # command to run tests script: fakeroot -u python -m attic.testsuite.run -vb From 2d148750e1eb70e5df57b0408d24fd73d8455937 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Apr 2015 17:21:52 +0200 Subject: [PATCH 55/58] fix BufferFull msgpack exception now that a unittest produced the maximum chunk size possible, it crashed and made clear that the unpacker limits were not yet correct. giving the unpacker now a buffer of CHUNK_MAX + 1000 (the latter is a generous amount for the little chunk metadata we have). --- attic/key.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/attic/key.py b/attic/key.py index a0e6f60c..e4b9ab58 100644 --- a/attic/key.py +++ b/attic/key.py @@ -25,6 +25,10 @@ from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES bytes_to_int, increment_iv from attic.helpers import IntegrityError, get_keys_dir, Error +# TODO fix cyclic import: +#from attic.archive import CHUNK_MAX +CHUNK_MAX = 10 * 1024 * 1024 + Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy') @@ -783,13 +787,12 @@ def parser03(all_data): # new & flexible meta is a Meta namedtuple and contains all required information about data. data is maybe compressed (see meta) and maybe encrypted (see meta). """ - max_len = 10000000 # XXX formula? unpacker = msgpack.Unpacker( use_list=False, # avoid memory allocation issues causes by tampered input data. - max_buffer_size=max_len, # does not work in 0.4.6 unpackb C implementation + max_buffer_size=CHUNK_MAX + 1000, # does not work in 0.4.6 unpackb C implementation max_array_len=10, # meta_tuple - max_bin_len=max_len, # data + max_bin_len=CHUNK_MAX, # data max_str_len=0, # not used yet max_map_len=0, # not used yet max_ext_len=0, # not used yet From e01fbf94121883ffae8a3cf002114eda2d30bfcb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Apr 2015 23:18:53 +0200 Subject: [PATCH 56/58] remove blosc pull from github, use 1.2.5 pypi release --- setup.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/setup.py b/setup.py index b8fbe4b8..f5bad1f0 100644 --- a/setup.py +++ b/setup.py @@ -98,16 +98,10 @@ elif platform == 'Darwin': # msgpack pure python data corruption was fixed in 0.4.6. # Also, we might use some rather recent API features. -install_requires=['msgpack-python>=0.4.6', 'blosc>1.2.4'] +install_requires=['msgpack-python>=0.4.6', 'blosc>=1.2.5'] if sys.version_info < (3, 3): install_requires.append('backports.lzma') -dependency_links=[ - # blosc 1.2.5 is not released yet, but needed for set_blocksize so we can - # get parallel compression even if only feeding it 64KB chunks of data... - "https://github.com/Blosc/python-blosc/archive/master.zip#egg=blosc-1.2.5" -] - setup( name='Attic', version=versioneer.get_version(), @@ -135,5 +129,4 @@ setup( cmdclass=cmdclass, ext_modules=ext_modules, install_requires=install_requires, - dependency_links=dependency_links, ) From f30ecf338e8392168f6a12b5384418fc7876498d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 19 Apr 2015 20:25:37 +0200 Subject: [PATCH 57/58] updated CHANGES-merge-all.txt --- CHANGES-merge-all.txt | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/CHANGES-merge-all.txt b/CHANGES-merge-all.txt index 6824a783..abd6943b 100644 --- a/CHANGES-merge-all.txt +++ b/CHANGES-merge-all.txt @@ -1,23 +1,53 @@ -Stuff in merge-all and not in merge minus minor changes: +Important note about "merge-all" branch +======================================= + +Goal of the rather experimental "merge-all" branch is to merge all the stuff: +- changesets from "merge" branch +- features that DO IMPACT compatibility with original attic +- etc. + +THERE IS NO GUARANTEE THAT IT IS COMPATIBLE WITH ORIGINAL ATTIC OR PREVIOUS +"merge-all" CODE nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN ORIGINAL CODE +OR PREVIOUS "merge-all" CODE AND THIS CODE WITHIN THE SAME REPOSITORY WITHOUT +ENCOUNTERING SEVERE ISSUES. + +Please also see the LICENSE for more informations. + + +Stuff in "merge-all" that is not in "merge" minus minor changes +=============================================================== added tuning docs attic init --compression NN --cipher NN --mac NN ... (see attic init --help) -new hashes: sha512_256 +new hashes: sha512-256 + sha512 + sha1 ghash (default) new MACs: hmac-sha512-256 + hmac-sha512 + hmac-sha1 gmac (default) new ciphers: aes256-ctr + hmac-sha512-256 aes256-gcm (default) new compression: no compression (default) zlib level 1..9 (previously, level 6 was hardcoded) lzma preset 0..9 + lz4 (and other) multi-threaded algos from blosc library source: more flexible type 0x03 header format, allowing to give hash algo, compression algo and level, encryption algo, key type. +IV is stored in full length, length of stored IV/MAC/hash is flexible. +Indexing key size (key = id_hash()) is flexible and configurable per repo. + source: less hardcoding, numeric offsets / lengths source: flexible hashing, compression, encryption, key dispatching + +Important changes and fixes within "merge-all" branch +===================================================== +Apr 15 2015: fix BufferFull msgpack exception + From 1f64ce7709b028e86fc09b4a3b77ff86593bc76c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 10 May 2015 17:44:15 +0200 Subject: [PATCH 58/58] fix traceback in --cipher 1 mode (aes-ctr + hmac) --- attic/key.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attic/key.py b/attic/key.py index fd8698e7..b6597a48 100644 --- a/attic/key.py +++ b/attic/key.py @@ -338,12 +338,12 @@ class AES_CTR_HMAC: _, data = self.enc_cipher.compute_mac_and_encrypt(data) self.enc_iv = increment_iv(meta.iv, len(data)) aad = get_aad(meta) - mac = HMAC(self.hmac_key, aad + data, sha256).digest() # XXX mac / hash flexibility + mac = HMAC_SHA256(self.hmac_key, aad + data).digest() # XXX mac / hash flexibility return mac, data def check_mac_and_decrypt(self, mac, meta, data): aad = get_aad(meta) - if HMAC(self.hmac_key, aad + data, sha256).digest() != mac: + if HMAC_SHA256(self.hmac_key, aad + data).digest() != mac: # XXX mac / hash flexibility raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=meta.iv) data = self.dec_cipher.check_mac_and_decrypt(None, data)