1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-26 09:47:58 +00:00

Chunker: fix wrong EOF assumption[1], check for return type[2]

[1]
This worked incidentally because OSes tend to return at least one page
worth of data when EOF is not reached. Increasing WINDOW_SIZE beyond
the page size might have lead to data loss.

[2]
If read() of the passed Python object returned something not-bytes,
PyBytes_Size returns -1 (ssize_t) which becomes a very larger number for
memcpy()s size_t.
This commit is contained in:
Marian Beermann 2016-03-31 22:03:17 +02:00
parent bf208479d7
commit 061bf59d5d
2 changed files with 18 additions and 3 deletions

View file

@ -174,6 +174,10 @@ chunker_fill(Chunker *c)
return 0; return 0;
} }
n = PyBytes_Size(data); n = PyBytes_Size(data);
if(PyErr_Occurred()) {
// we wanted bytes(), but got something else
return 0;
}
if(n) { if(n) {
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n); memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
c->remaining += n; c->remaining += n;
@ -200,12 +204,12 @@ chunker_process(Chunker *c)
PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
return NULL; return NULL;
} }
if(c->remaining <= window_size) { while(c->remaining <= window_size && !c->eof) {
if(!chunker_fill(c)) { if(!chunker_fill(c)) {
return NULL; return NULL;
} }
} }
if(c->remaining < window_size) { if(c->eof) {
c->done = 1; c->done = 1;
if(c->remaining) { if(c->remaining) {
c->bytes_yielded += c->remaining; c->bytes_yielded += c->remaining;

View file

@ -1,7 +1,7 @@
from io import BytesIO from io import BytesIO
from ..chunker import Chunker, buzhash, buzhash_update from ..chunker import Chunker, buzhash, buzhash_update
from ..archive import CHUNK_MAX_EXP from ..archive import CHUNK_MAX_EXP, CHUNKER_PARAMS
from . import BaseTestCase from . import BaseTestCase
@ -29,3 +29,14 @@ def test_buzhash(self):
self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1)) self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
# Test with more than 31 bytes to make sure our barrel_shift macro works correctly # Test with more than 31 bytes to make sure our barrel_shift macro works correctly
self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248) self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248)
def test_small_reads(self):
class SmallReadFile:
input = b'a' * (20 + 1)
def read(self, nbytes):
self.input = self.input[:-1]
return self.input[:1]
reconstructed = b''.join(Chunker(0, *CHUNKER_PARAMS).chunkify(SmallReadFile()))
assert reconstructed == b'a' * 20