From 061bf59d5d1371b68a43daf295551415f06f8b6c Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Thu, 31 Mar 2016 22:03:17 +0200 Subject: [PATCH] Chunker: fix wrong EOF assumption[1], check for return type[2] [1] This worked incidentally because OSes tend to return at least one page worth of data when EOF is not reached. Increasing WINDOW_SIZE beyond the page size might have lead to data loss. [2] If read() of the passed Python object returned something not-bytes, PyBytes_Size returns -1 (ssize_t) which becomes a very larger number for memcpy()s size_t. --- borg/_chunker.c | 8 ++++++-- borg/testsuite/chunker.py | 13 ++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 87ec31576..5a1ecfa15 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -174,6 +174,10 @@ chunker_fill(Chunker *c) return 0; } n = PyBytes_Size(data); + if(PyErr_Occurred()) { + // we wanted bytes(), but got something else + return 0; + } if(n) { memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n); c->remaining += n; @@ -200,12 +204,12 @@ chunker_process(Chunker *c) PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); return NULL; } - if(c->remaining <= window_size) { + while(c->remaining <= window_size && !c->eof) { if(!chunker_fill(c)) { return NULL; } } - if(c->remaining < window_size) { + if(c->eof) { c->done = 1; if(c->remaining) { c->bytes_yielded += c->remaining; diff --git a/borg/testsuite/chunker.py b/borg/testsuite/chunker.py index 9b12901eb..96e3111a2 100644 --- a/borg/testsuite/chunker.py +++ b/borg/testsuite/chunker.py @@ -1,7 +1,7 @@ from io import BytesIO from ..chunker import Chunker, buzhash, buzhash_update -from ..archive import CHUNK_MAX_EXP +from ..archive import CHUNK_MAX_EXP, CHUNKER_PARAMS from . import BaseTestCase @@ -29,3 +29,14 @@ class ChunkerTestCase(BaseTestCase): self.assert_equal(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1)) # Test with more than 31 bytes to make sure our barrel_shift macro works correctly self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248) + + def test_small_reads(self): + class SmallReadFile: + input = b'a' * (20 + 1) + + def read(self, nbytes): + self.input = self.input[:-1] + return self.input[:1] + + reconstructed = b''.join(Chunker(0, *CHUNKER_PARAMS).chunkify(SmallReadFile())) + assert reconstructed == b'a' * 20