From 3d3f3500f9af67f5dfa8fec59f759c111a4aa009 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Nov 2017 16:47:03 +0100 Subject: [PATCH] crc32: deal with unaligned buffer, tests, fixes #3317 fixing only the (generic) slice-by-8 crc32 implementation, it is assumed that CPUs supporting CLMUL can also efficiently and correctly deal with unaligned accesses. slice-by-8 is used e.g. on ARM cpus and they might not (efficiently) support unaligned memory access, leading to bus errors or low performance. (cherry picked from commit f9cd6f7512a7fed267c349c9de2d857591a74f91) --- src/borg/algorithms/crc32_slice_by_8.c | 20 ++++++++++++++++++-- src/borg/testsuite/checksums.py | 17 +++++++++++------ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/borg/algorithms/crc32_slice_by_8.c b/src/borg/algorithms/crc32_slice_by_8.c index dcfd8b8f9..b289fbb87 100644 --- a/src/borg/algorithms/crc32_slice_by_8.c +++ b/src/borg/algorithms/crc32_slice_by_8.c @@ -330,12 +330,28 @@ const uint32_t Crc32Lookup[8][256] = uint32_t crc32_slice_by_8(const void* data, size_t length, uint32_t previousCrc32) { uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF - const uint32_t* current = (const uint32_t*) data; + + const uint32_t* current; + const uint8_t* currentChar; // enabling optimization (at least -O2) automatically unrolls the inner for-loop const size_t Unroll = 4; const size_t BytesAtOnce = 8 * Unroll; - const uint8_t* currentChar; + + currentChar = (const uint8_t*) data; + + // wanted: 32 bit / 4 Byte alignment, compute leading, unaligned bytes length + uintptr_t unaligned_length = (4 - (((uintptr_t) currentChar) & 3)) & 3; + // process unaligned bytes, if any (standard algorithm) + while ((length != 0) && (unaligned_length != 0)) + { + crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++]; + length--; + unaligned_length--; + } + + // pointer points to 32bit aligned address now + current = (const uint32_t*) currentChar; // process 4x eight bytes at once (Slicing-by-8) while (length >= BytesAtOnce) diff --git a/src/borg/testsuite/checksums.py b/src/borg/testsuite/checksums.py index 5b0d9fb9d..ca02d709c 100644 --- a/src/borg/testsuite/checksums.py +++ b/src/borg/testsuite/checksums.py @@ -14,13 +14,18 @@ @pytest.mark.parametrize('implementation', crc32_implementations) def test_crc32(implementation): - # This includes many critical values, like zero length, 3/4/5, 6/7/8 and so on which are near and on - # alignment boundaries. This is of course just a sanity check ie. "did it compile all right?". - data = os.urandom(256) + # This includes many critical values, like misc. length and misc. aligned start addresses. + data = os.urandom(300) + mv = memoryview(data) initial_crc = 0x12345678 - for i in range(0, 256): - d = data[:i] - assert zlib.crc32(d, initial_crc) == implementation(d, initial_crc) + for start in range(0, 4): # 4B / int32 alignment, head processing + for length in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, + 127, 128, 129, 130, 131, 132, 133, 134, 135, + 255, 256, 257, ]: + d = mv[start:start+length] + assert zlib.crc32(d, initial_crc) == implementation(d, initial_crc) def test_xxh64():