From c5bd7f2216598d561c9a86136216f2f3d978076d Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 27 Mar 2016 01:12:23 +0100 Subject: [PATCH] Replace stdlib hmac with OpenSSL, zero-copy decrypt AESKeyBase.decrypt makes less copies of the data, specifically data[41:], which copies the payload, is gone. This is possible since this commit makes our lil' crypto API compatible with simple buffers. In Cython there is a syntax for creating memoryviews from buffer-enabled objects, however, it doesn't support read-only buffers. ro_buffer gets this job done, though, and also does the proper type checking (by PyBUF_SIMPLE). Note: msgpack doesn't support memoryviews. Otherwise we could avoid copying the en/de-crypted output with the final "return out[:ctl]" This commit leads to a solid 10-15 % increase in performance of hash-lists and extract. The hmac_sha256() function releases the GIL. --- borg/crypto.pyx | 40 ++++++++++++++++++++++++++++++++++++++-- borg/key.py | 21 +++++++++++---------- borg/testsuite/crypto.py | 30 ++++++++++++++++++++++++++++-- 3 files changed, 77 insertions(+), 14 deletions(-) diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 172fe0745..8bee39fe4 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -3,9 +3,11 @@ This could be replaced by PyCrypto maybe? """ from libc.stdlib cimport malloc, free +from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release API_VERSION = 2 + cdef extern from "openssl/rand.h": int RAND_bytes(unsigned char *buf, int num) @@ -35,6 +37,14 @@ cdef extern from "openssl/evp.h": int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) + EVP_MD *EVP_sha256() nogil + + +cdef extern from "openssl/hmac.h": + unsigned char *HMAC(const EVP_MD *evp_md, + const void *key, int key_len, + const unsigned char *data, int data_len, + unsigned char *md, unsigned int *md_len) nogil import struct @@ -46,6 +56,12 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] long_to_bytes = lambda x: _long.pack(x) +cdef Py_buffer ro_buffer(object data) except *: + cdef Py_buffer view + PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) + return view + + def num_aes_blocks(int length): """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data. Note: this is only correct for modes without padding, like AES-CTR. @@ -95,6 +111,7 @@ cdef class AES: return self.ctx.iv[:16] def encrypt(self, data): + cdef Py_buffer data_buf = ro_buffer(data) cdef int inl = len(data) cdef int ctl = 0 cdef int outl = 0 @@ -103,7 +120,7 @@ cdef class AES: if not out: raise MemoryError try: - if not EVP_EncryptUpdate(&self.ctx, out, &outl, data, inl): + if not EVP_EncryptUpdate(&self.ctx, out, &outl, data_buf.buf, inl): raise Exception('EVP_EncryptUpdate failed') ctl = outl if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): @@ -112,8 +129,10 @@ cdef class AES: return out[:ctl] finally: free(out) + PyBuffer_Release(&data_buf) def decrypt(self, data): + cdef Py_buffer data_buf = ro_buffer(data) cdef int inl = len(data) cdef int ptl = 0 cdef int outl = 0 @@ -124,7 +143,7 @@ cdef class AES: if not out: raise MemoryError try: - if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): + if not EVP_DecryptUpdate(&self.ctx, out, &outl, data_buf.buf, inl): raise Exception('EVP_DecryptUpdate failed') ptl = outl if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: @@ -136,3 +155,20 @@ cdef class AES: return out[:ptl] finally: free(out) + PyBuffer_Release(&data_buf) + + +def hmac_sha256(key, data): + md = bytes(32) + cdef Py_buffer data_buf = ro_buffer(data) + cdef const unsigned char *key_ptr = key + cdef int key_len = len(key) + cdef unsigned char *md_ptr = md + try: + with nogil: + rc = HMAC(EVP_sha256(), key_ptr, key_len, data_buf.buf, data_buf.len, md_ptr, NULL) + if rc != md_ptr: + raise Exception('HMAC(EVP_sha256) failed') + finally: + PyBuffer_Release(&data_buf) + return md diff --git a/borg/key.py b/borg/key.py index 113214ab6..eb452b77a 100644 --- a/borg/key.py +++ b/borg/key.py @@ -4,14 +4,14 @@ import getpass import os import sys import textwrap -from hmac import HMAC, compare_digest +from hmac import compare_digest from hashlib import sha256, pbkdf2_hmac from .helpers import IntegrityError, get_keys_dir, Error, yes from .logger import create_logger logger = create_logger() -from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256 from .compress import Compressor, COMPR_BUFFER import msgpack @@ -126,28 +126,29 @@ class AESKeyBase(KeyBase): def id_hash(self, data): """Return HMAC hash using the "id" HMAC key """ - return HMAC(self.id_key, data, sha256).digest() + return hmac_sha256(self.id_key, data) def encrypt(self, data): data = self.compressor.compress(data) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) - hmac = HMAC(self.enc_hmac_key, data, sha256).digest() + hmac = hmac_sha256(self.enc_hmac_key, data) return b''.join((self.TYPE_STR, hmac, data)) def decrypt(self, id, data): if not (data[0] == self.TYPE or data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)): raise IntegrityError('Invalid encryption envelope') - hmac_given = memoryview(data)[1:33] - hmac_computed = memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) + data_view = memoryview(data) + hmac_given = data_view[1:33] + hmac_computed = memoryview(hmac_sha256(self.enc_hmac_key, data_view[33:])) if not compare_digest(hmac_computed, hmac_given): raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:])) + data = self.compressor.decompress(self.dec_cipher.decrypt(data_view[41:])) if id: hmac_given = id - hmac_computed = HMAC(self.id_key, data, sha256).digest() + hmac_computed = hmac_sha256(self.id_key, data) if not compare_digest(hmac_computed, hmac_given): raise IntegrityError('Chunk id verification failed') return data @@ -322,14 +323,14 @@ class KeyfileKeyBase(AESKeyBase): assert d[b'algorithm'] == b'sha256' key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32) data = AES(is_encrypt=False, key=key).decrypt(d[b'data']) - if HMAC(key, data, sha256).digest() == d[b'hash']: + if hmac_sha256(key, data) == d[b'hash']: return data def encrypt_key_file(self, data, passphrase): salt = os.urandom(32) iterations = 100000 key = passphrase.kdf(salt, iterations, 32) - hash = HMAC(key, data, sha256).digest() + hash = hmac_sha256(key, data) cdata = AES(is_encrypt=True, key=key).encrypt(data) d = { 'version': 1, diff --git a/borg/testsuite/crypto.py b/borg/testsuite/crypto.py index 2d74493d6..9609e259a 100644 --- a/borg/testsuite/crypto.py +++ b/borg/testsuite/crypto.py @@ -1,6 +1,6 @@ -from binascii import hexlify +from binascii import hexlify, unhexlify -from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes +from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256 from . import BaseTestCase @@ -28,3 +28,29 @@ class CryptoTestCase(BaseTestCase): pdata = aes.decrypt(cdata) self.assert_equal(data, pdata) self.assert_equal(bytes_to_long(aes.iv, 8), 2) + + def test_hmac_sha256(self): + # RFC 4231 test vectors + key = b'\x0b' * 20 + # Also test that this works with memory views + data = memoryview(unhexlify('4869205468657265')) + hmac = unhexlify('b0344c61d8db38535ca8afceaf0bf12b' + '881dc200c9833da726e9376c2e32cff7') + assert hmac_sha256(key, data) == hmac + key = unhexlify('4a656665') + data = unhexlify('7768617420646f2079612077616e7420' + '666f72206e6f7468696e673f') + hmac = unhexlify('5bdcc146bf60754e6a042426089575c7' + '5a003f089d2739839dec58b964ec3843') + assert hmac_sha256(key, data) == hmac + key = b'\xaa' * 20 + data = b'\xdd' * 50 + hmac = unhexlify('773ea91e36800e46854db8ebd09181a7' + '2959098b3ef8c122d9635514ced565fe') + assert hmac_sha256(key, data) == hmac + key = unhexlify('0102030405060708090a0b0c0d0e0f10' + '111213141516171819') + data = b'\xcd' * 50 + hmac = unhexlify('82558a389a443c0ea4cc819899f2083a' + '85f0faa3e578f8077a2e3ff46729665b') + assert hmac_sha256(key, data) == hmac