From ce176b52bf6aa096a7f6e702edf1d966c8fd008d Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 15 Apr 2016 19:27:44 -0400 Subject: [PATCH] Workround a bug in Linux fadvise FADV_DONTNEED, fixes #907 Despite what the man page says, Linux does not discard the initial partial page only. The ending page would be truncated no matter if it is partial or not. Page-align the fadvise size to take care of this. Also while we are at it, roll back initial fadvise offset to the previous page boundary to actually throw away that page as we no longer need it having read the second part now and the first time in the previous call. This patch has a noticeable impact in my Linux testing when the file is on the rotating media. The total test runtime decreased by a bit over 10%, but since over half of that time was actually cpu time, the actual iowait time decreased around 20%. --- borg/_chunker.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 5a1ecfa15..5158009cc 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -41,6 +41,7 @@ static uint32_t table_base[] = #define BARREL_SHIFT(v, shift) ( ((v) << shift) | ((v) >> (32 - shift)) ) +size_t pagemask; static uint32_t * buzhash_init_table(uint32_t seed) @@ -130,6 +131,7 @@ chunker_fill(Chunker *c) { ssize_t n; off_t offset, length; + int overshoot; PyObject *data; memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; @@ -157,14 +159,33 @@ chunker_fill(Chunker *c) } length = c->bytes_read - offset; #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) ) + + // Only do it once per run. + if (pagemask == 0) + pagemask = getpagesize() - 1; + // We tell the OS that we do not need the data that we just have read any // more (that it maybe has in the cache). This avoids that we spoil the // complete cache with data that we only read once and (due to cache // size limit) kick out data from the cache that might be still useful // for the OS or other processes. + // We rollback the initial offset back to the start of the page, + // to avoid it not being truncated as a partial page request. if (length > 0) { - posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED); - } + // Linux kernels prior to 4.7 have a bug where they truncate + // last partial page of POSIX_FADV_DONTNEED request, so we need + // to page-align it ourselves. We'll need the rest of this page + // on the next read (assuming this was not EOF) + overshoot = (offset + length) & pagemask; + } else { + // For length == 0 we set overshoot 0, so the below + // length - overshoot is 0, which means till end of file for + // fadvise. This will cancel the final page and is not part + // of the above workaround. + overshoot = 0; + } + + posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED); #endif } else {