mirror of https://github.com/borgbackup/borg.git
chunker speedup and regression test
This commit is contained in:
parent
29aec480c0
commit
708fa83c1d
|
@ -247,7 +247,7 @@ static PyObject *
|
|||
chunker_process(Chunker *c)
|
||||
{
|
||||
uint32_t sum, chunk_mask = c->chunk_mask;
|
||||
size_t n = 0, old_last, min_size = c->min_size, window_size = c->window_size;
|
||||
size_t n, old_last, min_size = c->min_size, window_size = c->window_size;
|
||||
|
||||
if(c->done) {
|
||||
if(c->bytes_read == c->bytes_yielded)
|
||||
|
@ -282,15 +282,19 @@ chunker_process(Chunker *c)
|
|||
*/
|
||||
c->position += min_size;
|
||||
c->remaining -= min_size;
|
||||
n += min_size;
|
||||
sum = buzhash(c->data + c->position, window_size, c->table);
|
||||
while(c->remaining > c->window_size && (sum & chunk_mask)) {
|
||||
sum = buzhash_update(sum, c->data[c->position],
|
||||
c->data[c->position + window_size],
|
||||
window_size, c->table);
|
||||
c->position++;
|
||||
c->remaining--;
|
||||
n++;
|
||||
uint8_t *p = c->data + c->position;
|
||||
uint8_t *stop_at = p + c->remaining - window_size;
|
||||
size_t did_bytes;
|
||||
while (p < stop_at && (sum & chunk_mask)) {
|
||||
sum = buzhash_update(sum, p[0], p[window_size],
|
||||
window_size, c->table);
|
||||
p++;
|
||||
}
|
||||
did_bytes = p - (c->data + c->position);
|
||||
c->position += did_bytes;
|
||||
c->remaining -= did_bytes;
|
||||
if(c->remaining <= window_size) {
|
||||
if(!chunker_fill(c)) {
|
||||
return NULL;
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
from io import BytesIO
|
||||
from binascii import unhexlify
|
||||
|
||||
from ..chunker import Chunker
|
||||
from ..crypto.low_level import blake2b_256
|
||||
from ..constants import * # NOQA
|
||||
from . import BaseTestCase
|
||||
|
||||
|
||||
class ChunkerRegressionTestCase(BaseTestCase):
|
||||
|
||||
def test_chunkpoints_unchanged(self):
|
||||
def twist(size):
|
||||
x = 1
|
||||
a = bytearray(size)
|
||||
for i in range(size):
|
||||
x = (x * 1103515245 + 12345) & 0x7FFFFFFF
|
||||
a[i] = x & 0xFF
|
||||
return a
|
||||
|
||||
data = twist(100000)
|
||||
|
||||
runs = []
|
||||
for winsize in (65, 129, HASH_WINDOW_SIZE, 7351):
|
||||
for minexp in (4, 6, 7, 11, 12):
|
||||
for maxexp in (15, 17):
|
||||
if minexp >= maxexp:
|
||||
continue
|
||||
for maskbits in (4, 7, 10, 12):
|
||||
for seed in (1849058162, 1234567653):
|
||||
fh = BytesIO(data)
|
||||
chunker = Chunker(seed, minexp, maxexp, maskbits, winsize)
|
||||
chunks = [blake2b_256(b'', c) for c in chunker.chunkify(fh, -1)]
|
||||
runs.append(blake2b_256(b'', b''.join(chunks)))
|
||||
|
||||
# The "correct" hash below matches the existing chunker behavior.
|
||||
# Future chunker optimisations must not change this, or existing repos will bloat.
|
||||
overall_hash = blake2b_256(b'', b''.join(runs))
|
||||
self.assert_equal(overall_hash, unhexlify("b559b0ac8df8daaa221201d018815114241ea5c6609d98913cd2246a702af4e3"))
|
Loading…
Reference in New Issue