Merge pull request #2893 from MichaelEischer/restore-preallocate

restorer: Preallocate files
This commit is contained in:
MichaelEischer 2020-09-08 22:43:05 +02:00 committed by GitHub
commit 88664ba222
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 133 additions and 11 deletions

View File

@ -14,4 +14,10 @@ file can be written to the file before any of the preceeding file blobs.
It is therefore possible to have gaps in the data written to the target It is therefore possible to have gaps in the data written to the target
files if restore fails or interrupted by the user. files if restore fails or interrupted by the user.
The implementation will try to preallocate space for the restored files
on the filesystem to prevent file fragmentation. This ensures good read
performance for large files, like for example VM images. If preallocating
space is not supported by the filesystem, then this step is silently skipped.
https://github.com/restic/restic/pull/2195 https://github.com/restic/restic/pull/2195
https://github.com/restic/restic/pull/2893

View File

@ -33,6 +33,7 @@ const (
type fileInfo struct { type fileInfo struct {
lock sync.Mutex lock sync.Mutex
flags int flags int
size int64
location string // file on local filesystem relative to restorer basedir location string // file on local filesystem relative to restorer basedir
blobs interface{} // blobs of the file blobs interface{} // blobs of the file
} }
@ -74,8 +75,8 @@ func newFileRestorer(dst string,
} }
} }
func (r *fileRestorer) addFile(location string, content restic.IDs) { func (r *fileRestorer) addFile(location string, content restic.IDs, size int64) {
r.files = append(r.files, &fileInfo{location: location, blobs: content}) r.files = append(r.files, &fileInfo{location: location, blobs: content, size: size})
} }
func (r *fileRestorer) targetPath(location string) string { func (r *fileRestorer) targetPath(location string) string {
@ -101,6 +102,10 @@ func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID
func (r *fileRestorer) restoreFiles(ctx context.Context) error { func (r *fileRestorer) restoreFiles(ctx context.Context) error {
packs := make(map[restic.ID]*packInfo) // all packs packs := make(map[restic.ID]*packInfo) // all packs
// Process packs in order of first access. While this cannot guarantee
// that file chunks are restored sequentially, it offers a good enough
// approximation to shorten restore times by up to 19% in some test.
var packOrder restic.IDs
// create packInfo from fileInfo // create packInfo from fileInfo
for _, file := range r.files { for _, file := range r.files {
@ -123,6 +128,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
files: make(map[*fileInfo]struct{}), files: make(map[*fileInfo]struct{}),
} }
packs[packID] = pack packs[packID] = pack
packOrder = append(packOrder, packID)
} }
pack.files[file] = struct{}{} pack.files[file] = struct{}{}
}) })
@ -157,7 +163,8 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
} }
// the main restore loop // the main restore loop
for _, pack := range packs { for _, id := range packOrder {
pack := packs[id]
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
@ -269,13 +276,15 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) {
// write other blobs after releasing the lock // write other blobs after releasing the lock
file.lock.Lock() file.lock.Lock()
create := file.flags&fileProgress == 0 create := file.flags&fileProgress == 0
createSize := int64(-1)
if create { if create {
defer file.lock.Unlock() defer file.lock.Unlock()
file.flags |= fileProgress file.flags |= fileProgress
createSize = file.size
} else { } else {
file.lock.Unlock() file.lock.Unlock()
} }
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, create) return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize)
} }
err := writeToFile() err := writeToFile()
if err != nil { if err != nil {

View File

@ -5,6 +5,7 @@ import (
"sync" "sync"
"github.com/cespare/xxhash" "github.com/cespare/xxhash"
"github.com/restic/restic/internal/debug"
) )
// writes blobs to target files. // writes blobs to target files.
@ -33,7 +34,7 @@ func newFilesWriter(count int) *filesWriter {
} }
} }
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create bool) error { func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
acquireWriter := func() (*os.File, error) { acquireWriter := func() (*os.File, error) {
@ -46,7 +47,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
} }
var flags int var flags int
if create { if createSize >= 0 {
flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY
} else { } else {
flags = os.O_WRONLY flags = os.O_WRONLY
@ -60,6 +61,18 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
bucket.files[path] = wr bucket.files[path] = wr
bucket.users[path] = 1 bucket.users[path] = 1
if createSize >= 0 {
err := preallocateFile(wr, createSize)
if err != nil {
// Just log the preallocate error but don't let it cause the restore process to fail.
// Preallocate might return an error if the filesystem (implementation) does not
// support preallocation or our parameters combination to the preallocate call
// This should yield a syscall.ENOTSUP error, but some other errors might also
// show up.
debug.Log("Failed to preallocate %v with size %v: %v", path, createSize, err)
}
}
return wr, nil return wr, nil
} }

View File

@ -16,19 +16,19 @@ func TestFilesWriterBasic(t *testing.T) {
f1 := dir + "/f1" f1 := dir + "/f1"
f2 := dir + "/f2" f2 := dir + "/f2"
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, true)) rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.Equals(t, 0, len(w.buckets[0].users))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, true)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.Equals(t, 0, len(w.buckets[0].users))
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, false)) rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.Equals(t, 0, len(w.buckets[0].users))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, false)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.Equals(t, 0, len(w.buckets[0].users))

View File

@ -0,0 +1,33 @@
package restorer
import (
"os"
"runtime"
"unsafe"
"golang.org/x/sys/unix"
)
func preallocateFile(wr *os.File, size int64) error {
// try contiguous first
fst := unix.Fstore_t{
Flags: unix.F_ALLOCATECONTIG | unix.F_ALLOCATEALL,
Posmode: unix.F_PEOFPOSMODE,
Offset: 0,
Length: size,
}
_, err := unix.FcntlInt(wr.Fd(), unix.F_PREALLOCATE, int(uintptr(unsafe.Pointer(&fst))))
if err == nil {
return nil
}
// just take preallocation in any form, but still ask for everything
fst.Flags = unix.F_ALLOCATEALL
_, err = unix.FcntlInt(wr.Fd(), unix.F_PREALLOCATE, int(uintptr(unsafe.Pointer(&fst))))
// Keep struct alive until fcntl has returned
runtime.KeepAlive(fst)
return err
}

View File

@ -0,0 +1,16 @@
package restorer
import (
"os"
"golang.org/x/sys/unix"
)
func preallocateFile(wr *os.File, size int64) error {
if size <= 0 {
return nil
}
// int fallocate(int fd, int mode, off_t offset, off_t len)
// use mode = 0 to also change the file size
return unix.Fallocate(int(wr.Fd()), 0, 0, size)
}

View File

@ -0,0 +1,11 @@
// +build !linux,!darwin
package restorer
import "os"
func preallocateFile(wr *os.File, size int64) error {
// Maybe truncate can help?
// Windows: This calls SetEndOfFile which preallocates space on disk
return wr.Truncate(size)
}

View File

@ -0,0 +1,34 @@
package restorer
import (
"os"
"path"
"strconv"
"testing"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/test"
)
func TestPreallocate(t *testing.T) {
for _, i := range []int64{0, 1, 4096, 1024 * 1024} {
t.Run(strconv.FormatInt(i, 10), func(t *testing.T) {
dirpath, cleanup := test.TempDir(t)
defer cleanup()
flags := os.O_CREATE | os.O_TRUNC | os.O_WRONLY
wr, err := os.OpenFile(path.Join(dirpath, "test"), flags, 0600)
test.OK(t, err)
defer wr.Close()
err = preallocateFile(wr, i)
test.OK(t, err)
fi, err := wr.Stat()
test.OK(t, err)
efi := fs.ExtendedStat(fi)
test.Assert(t, efi.Size == i || efi.Blocks > 0, "Preallocated size of %v, got size %v block %v", i, efi.Size, efi.Blocks)
})
}
}

View File

@ -238,7 +238,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
idx.Add(node.Inode, node.DeviceID, location) idx.Add(node.Inode, node.DeviceID, location)
} }
filerestorer.addFile(location, node.Content) filerestorer.addFile(location, node.Content, int64(node.Size))
return nil return nil
}, },