mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-26 01:37:20 +00:00
Chunker: fix wrong EOF assumption[1], check for return type[2]
[1] This worked incidentally because OSes tend to return at least one page worth of data when EOF is not reached. Increasing WINDOW_SIZE beyond the page size might have lead to data loss. [2] If read() of the passed Python object returned something not-bytes, PyBytes_Size returns -1 (ssize_t) which becomes a very larger number for memcpy()s size_t.
This commit is contained in:
parent
bf208479d7
commit
061bf59d5d
2 changed files with 18 additions and 3 deletions
|
@ -174,6 +174,10 @@ chunker_fill(Chunker *c)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
n = PyBytes_Size(data);
|
n = PyBytes_Size(data);
|
||||||
|
if(PyErr_Occurred()) {
|
||||||
|
// we wanted bytes(), but got something else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
if(n) {
|
if(n) {
|
||||||
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
|
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
|
||||||
c->remaining += n;
|
c->remaining += n;
|
||||||
|
@ -200,12 +204,12 @@ chunker_process(Chunker *c)
|
||||||
PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
|
PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if(c->remaining <= window_size) {
|
while(c->remaining <= window_size && !c->eof) {
|
||||||
if(!chunker_fill(c)) {
|
if(!chunker_fill(c)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(c->remaining < window_size) {
|
if(c->eof) {
|
||||||
c->done = 1;
|
c->done = 1;
|
||||||
if(c->remaining) {
|
if(c->remaining) {
|
||||||
c->bytes_yielded += c->remaining;
|
c->bytes_yielded += c->remaining;
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from ..chunker import Chunker, buzhash, buzhash_update
|
from ..chunker import Chunker, buzhash, buzhash_update
|
||||||
from ..archive import CHUNK_MAX_EXP
|
from ..archive import CHUNK_MAX_EXP, CHUNKER_PARAMS
|
||||||
from . import BaseTestCase
|
from . import BaseTestCase
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,3 +29,14 @@ def test_buzhash(self):
|
||||||
self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
|
self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
|
||||||
# Test with more than 31 bytes to make sure our barrel_shift macro works correctly
|
# Test with more than 31 bytes to make sure our barrel_shift macro works correctly
|
||||||
self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248)
|
self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248)
|
||||||
|
|
||||||
|
def test_small_reads(self):
|
||||||
|
class SmallReadFile:
|
||||||
|
input = b'a' * (20 + 1)
|
||||||
|
|
||||||
|
def read(self, nbytes):
|
||||||
|
self.input = self.input[:-1]
|
||||||
|
return self.input[:1]
|
||||||
|
|
||||||
|
reconstructed = b''.join(Chunker(0, *CHUNKER_PARAMS).chunkify(SmallReadFile()))
|
||||||
|
assert reconstructed == b'a' * 20
|
||||||
|
|
Loading…
Reference in a new issue