crc32: deal with unaligned buffer, tests, fixes #3317

fixing only the (generic) slice-by-8 crc32 implementation,
it is assumed that CPUs supporting CLMUL can also efficiently
and correctly deal with unaligned accesses.

slice-by-8 is used e.g. on ARM cpus and they might not (efficiently)
support unaligned memory access, leading to bus errors or low
performance.
This commit is contained in:
Thomas Waldmann 2017-11-13 16:47:03 +01:00
parent b3c11dee6c
commit f9cd6f7512
2 changed files with 29 additions and 8 deletions

View File

@ -330,12 +330,28 @@ const uint32_t Crc32Lookup[8][256] =
uint32_t crc32_slice_by_8(const void* data, size_t length, uint32_t previousCrc32)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint32_t* current = (const uint32_t*) data;
const uint32_t* current;
const uint8_t* currentChar;
// enabling optimization (at least -O2) automatically unrolls the inner for-loop
const size_t Unroll = 4;
const size_t BytesAtOnce = 8 * Unroll;
const uint8_t* currentChar;
currentChar = (const uint8_t*) data;
// wanted: 32 bit / 4 Byte alignment, compute leading, unaligned bytes length
uintptr_t unaligned_length = (4 - (((uintptr_t) currentChar) & 3)) & 3;
// process unaligned bytes, if any (standard algorithm)
while ((length != 0) && (unaligned_length != 0))
{
crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++];
length--;
unaligned_length--;
}
// pointer points to 32bit aligned address now
current = (const uint32_t*) currentChar;
// process 4x eight bytes at once (Slicing-by-8)
while (length >= BytesAtOnce)

View File

@ -14,12 +14,17 @@ if checksums.have_clmul:
@pytest.mark.parametrize('implementation', crc32_implementations)
def test_crc32(implementation):
# This includes many critical values, like zero length, 3/4/5, 6/7/8 and so on which are near and on
# alignment boundaries. This is of course just a sanity check ie. "did it compile all right?".
data = os.urandom(256)
# This includes many critical values, like misc. length and misc. aligned start addresses.
data = os.urandom(300)
mv = memoryview(data)
initial_crc = 0x12345678
for i in range(0, 256):
d = data[:i]
for start in range(0, 4): # 4B / int32 alignment, head processing
for length in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
127, 128, 129, 130, 131, 132, 133, 134, 135,
255, 256, 257, ]:
d = mv[start:start+length]
assert zlib.crc32(d, initial_crc) == implementation(d, initial_crc)