From 067be2c5519a3553b4c4bc701f0588e9211b7511 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 5 Jun 2017 23:15:33 +0200 Subject: [PATCH 1/4] fuse: Add cache for blob sizes Closes: #820 --- src/restic/fuse/dir.go | 12 ++++++++---- src/restic/fuse/file.go | 11 +++++++---- src/restic/fuse/file_test.go | 2 +- src/restic/fuse/snapshot.go | 17 ++++++++++++++++- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/restic/fuse/dir.go b/src/restic/fuse/dir.go index 49210bb90..439ea16e7 100644 --- a/src/restic/fuse/dir.go +++ b/src/restic/fuse/dir.go @@ -24,9 +24,11 @@ type dir struct { inode uint64 node *restic.Node ownerIsRoot bool + + sizes map[restic.ID]uint } -func newDir(ctx context.Context, repo restic.Repository, node *restic.Node, ownerIsRoot bool) (*dir, error) { +func newDir(ctx context.Context, repo restic.Repository, node *restic.Node, ownerIsRoot bool, sizes map[restic.ID]uint) (*dir, error) { debug.Log("new dir for %v (%v)", node.Name, node.Subtree.Str()) tree, err := repo.LoadTree(ctx, *node.Subtree) if err != nil { @@ -44,6 +46,7 @@ func newDir(ctx context.Context, repo restic.Repository, node *restic.Node, owne items: items, inode: node.Inode, ownerIsRoot: ownerIsRoot, + sizes: sizes, }, nil } @@ -66,7 +69,7 @@ func replaceSpecialNodes(ctx context.Context, repo restic.Repository, node *rest return tree.Nodes, nil } -func newDirFromSnapshot(ctx context.Context, repo restic.Repository, snapshot SnapshotWithId, ownerIsRoot bool) (*dir, error) { +func newDirFromSnapshot(ctx context.Context, repo restic.Repository, snapshot SnapshotWithId, ownerIsRoot bool, sizes map[restic.ID]uint) (*dir, error) { debug.Log("new dir for snapshot %v (%v)", snapshot.ID.Str(), snapshot.Tree.Str()) tree, err := repo.LoadTree(ctx, *snapshot.Tree) if err != nil { @@ -99,6 +102,7 @@ func newDirFromSnapshot(ctx context.Context, repo restic.Repository, snapshot Sn items: items, inode: inodeFromBackendID(snapshot.ID), ownerIsRoot: ownerIsRoot, + sizes: sizes, }, nil } @@ -167,9 +171,9 @@ func (d *dir) Lookup(ctx context.Context, name string) (fs.Node, error) { } switch node.Type { case "dir": - return newDir(ctx, d.repo, node, d.ownerIsRoot) + return newDir(ctx, d.repo, node, d.ownerIsRoot, d.sizes) case "file": - return newFile(d.repo, node, d.ownerIsRoot) + return newFile(d.repo, node, d.ownerIsRoot, d.sizes) case "symlink": return newLink(d.repo, node, d.ownerIsRoot) default: diff --git a/src/restic/fuse/file.go b/src/restic/fuse/file.go index ae3ba5a7d..679c5e010 100644 --- a/src/restic/fuse/file.go +++ b/src/restic/fuse/file.go @@ -41,14 +41,17 @@ type file struct { const defaultBlobSize = 128 * 1024 -func newFile(repo BlobLoader, node *restic.Node, ownerIsRoot bool) (*file, error) { +func newFile(repo BlobLoader, node *restic.Node, ownerIsRoot bool, sizecache map[restic.ID]uint) (fusefile *file, err error) { debug.Log("create new file for %v with %d blobs", node.Name, len(node.Content)) var bytes uint64 sizes := make([]int, len(node.Content)) for i, id := range node.Content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - return nil, err + size, ok := sizecache[id] + if !ok { + size, err = repo.LookupBlobSize(id, restic.DataBlob) + if err != nil { + return nil, err + } } sizes[i] = int(size) diff --git a/src/restic/fuse/file_test.go b/src/restic/fuse/file_test.go index dcb959fec..87aaa052d 100644 --- a/src/restic/fuse/file_test.go +++ b/src/restic/fuse/file_test.go @@ -108,7 +108,7 @@ func TestFuseFile(t *testing.T) { Size: filesize, Content: content, } - f, err := newFile(repo, node, false) + f, err := newFile(repo, node, false, make(map[restic.ID]uint)) OK(t, err) attr := fuse.Attr{} diff --git a/src/restic/fuse/snapshot.go b/src/restic/fuse/snapshot.go index 4057301f8..2aa34e341 100644 --- a/src/restic/fuse/snapshot.go +++ b/src/restic/fuse/snapshot.go @@ -14,6 +14,7 @@ import ( "restic" "restic/debug" + "restic/repository" "golang.org/x/net/context" ) @@ -36,12 +37,25 @@ type SnapshotsDir struct { tags []string host string + // sizes caches the sizes of all blobs. + sizes map[restic.ID]uint + // knownSnapshots maps snapshot timestamp to the snapshot sync.RWMutex knownSnapshots map[string]SnapshotWithId processed restic.IDSet } +func sizeCache(midx *repository.MasterIndex) map[restic.ID]uint { + c := make(map[restic.ID]uint, 1000) + for _, idx := range midx.All() { + for pb := range idx.Each(nil) { + c[pb.ID] = pb.Length + } + } + return c +} + // NewSnapshotsDir returns a new dir object for the snapshots. func NewSnapshotsDir(repo restic.Repository, ownerIsRoot bool, paths []string, tags []string, host string) *SnapshotsDir { debug.Log("fuse mount initiated") @@ -53,6 +67,7 @@ func NewSnapshotsDir(repo restic.Repository, ownerIsRoot bool, paths []string, t host: host, knownSnapshots: make(map[string]SnapshotWithId), processed: restic.NewIDSet(), + sizes: sizeCache(repo.Index().(*repository.MasterIndex)), } } @@ -158,5 +173,5 @@ func (sn *SnapshotsDir) Lookup(ctx context.Context, name string) (fs.Node, error } } - return newDirFromSnapshot(ctx, sn.repo, snapshot, sn.ownerIsRoot) + return newDirFromSnapshot(ctx, sn.repo, snapshot, sn.ownerIsRoot, sn.sizes) } From 233eaf8ee9e354ef7e1b56b6b4428cb9d8e74b27 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 7 Jun 2017 20:01:31 +0200 Subject: [PATCH 2/4] fuse: Improve semantics of the blob size cache Wrap it in a struct and add a Lookup() function to make clear that it is only queried, not changed, so we don't have any race conditions. --- src/restic/fuse/dir.go | 14 +++++------ src/restic/fuse/file.go | 4 ++-- src/restic/fuse/file_test.go | 2 +- src/restic/fuse/snapshot.go | 45 +++++++++++++++++++++++++----------- 4 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/restic/fuse/dir.go b/src/restic/fuse/dir.go index 439ea16e7..f7920f6ca 100644 --- a/src/restic/fuse/dir.go +++ b/src/restic/fuse/dir.go @@ -25,10 +25,10 @@ type dir struct { node *restic.Node ownerIsRoot bool - sizes map[restic.ID]uint + blobsize *BlobSizeCache } -func newDir(ctx context.Context, repo restic.Repository, node *restic.Node, ownerIsRoot bool, sizes map[restic.ID]uint) (*dir, error) { +func newDir(ctx context.Context, repo restic.Repository, node *restic.Node, ownerIsRoot bool, blobsize *BlobSizeCache) (*dir, error) { debug.Log("new dir for %v (%v)", node.Name, node.Subtree.Str()) tree, err := repo.LoadTree(ctx, *node.Subtree) if err != nil { @@ -46,7 +46,7 @@ func newDir(ctx context.Context, repo restic.Repository, node *restic.Node, owne items: items, inode: node.Inode, ownerIsRoot: ownerIsRoot, - sizes: sizes, + blobsize: blobsize, }, nil } @@ -69,7 +69,7 @@ func replaceSpecialNodes(ctx context.Context, repo restic.Repository, node *rest return tree.Nodes, nil } -func newDirFromSnapshot(ctx context.Context, repo restic.Repository, snapshot SnapshotWithId, ownerIsRoot bool, sizes map[restic.ID]uint) (*dir, error) { +func newDirFromSnapshot(ctx context.Context, repo restic.Repository, snapshot SnapshotWithId, ownerIsRoot bool, blobsize *BlobSizeCache) (*dir, error) { debug.Log("new dir for snapshot %v (%v)", snapshot.ID.Str(), snapshot.Tree.Str()) tree, err := repo.LoadTree(ctx, *snapshot.Tree) if err != nil { @@ -102,7 +102,7 @@ func newDirFromSnapshot(ctx context.Context, repo restic.Repository, snapshot Sn items: items, inode: inodeFromBackendID(snapshot.ID), ownerIsRoot: ownerIsRoot, - sizes: sizes, + blobsize: blobsize, }, nil } @@ -171,9 +171,9 @@ func (d *dir) Lookup(ctx context.Context, name string) (fs.Node, error) { } switch node.Type { case "dir": - return newDir(ctx, d.repo, node, d.ownerIsRoot, d.sizes) + return newDir(ctx, d.repo, node, d.ownerIsRoot, d.blobsize) case "file": - return newFile(d.repo, node, d.ownerIsRoot, d.sizes) + return newFile(d.repo, node, d.ownerIsRoot, d.blobsize) case "symlink": return newLink(d.repo, node, d.ownerIsRoot) default: diff --git a/src/restic/fuse/file.go b/src/restic/fuse/file.go index 679c5e010..cfb799a82 100644 --- a/src/restic/fuse/file.go +++ b/src/restic/fuse/file.go @@ -41,12 +41,12 @@ type file struct { const defaultBlobSize = 128 * 1024 -func newFile(repo BlobLoader, node *restic.Node, ownerIsRoot bool, sizecache map[restic.ID]uint) (fusefile *file, err error) { +func newFile(repo BlobLoader, node *restic.Node, ownerIsRoot bool, blobsize *BlobSizeCache) (fusefile *file, err error) { debug.Log("create new file for %v with %d blobs", node.Name, len(node.Content)) var bytes uint64 sizes := make([]int, len(node.Content)) for i, id := range node.Content { - size, ok := sizecache[id] + size, ok := blobsize.Lookup(id) if !ok { size, err = repo.LookupBlobSize(id, restic.DataBlob) if err != nil { diff --git a/src/restic/fuse/file_test.go b/src/restic/fuse/file_test.go index 87aaa052d..4c4550282 100644 --- a/src/restic/fuse/file_test.go +++ b/src/restic/fuse/file_test.go @@ -108,7 +108,7 @@ func TestFuseFile(t *testing.T) { Size: filesize, Content: content, } - f, err := newFile(repo, node, false, make(map[restic.ID]uint)) + f, err := newFile(repo, node, false, nil) OK(t, err) attr := fuse.Attr{} diff --git a/src/restic/fuse/snapshot.go b/src/restic/fuse/snapshot.go index 2aa34e341..1b22ecd50 100644 --- a/src/restic/fuse/snapshot.go +++ b/src/restic/fuse/snapshot.go @@ -19,6 +19,34 @@ import ( "golang.org/x/net/context" ) +// BlobSizeCache caches the size of blobs in the repo. +type BlobSizeCache struct { + m map[restic.ID]uint +} + +// NewBlobSizeCache returns a new blob size cache containing all entries from midx. +func NewBlobSizeCache(midx *repository.MasterIndex) *BlobSizeCache { + m := make(map[restic.ID]uint, 1000) + for _, idx := range midx.All() { + for pb := range idx.Each(nil) { + m[pb.ID] = pb.Length + } + } + return &BlobSizeCache{ + m: m, + } +} + +// Lookup returns the size of the blob id. +func (c *BlobSizeCache) Lookup(id restic.ID) (size uint, found bool) { + if c == nil { + return 0, false + } + + size, found = c.m[id] + return size, found +} + type SnapshotWithId struct { *restic.Snapshot restic.ID @@ -37,8 +65,7 @@ type SnapshotsDir struct { tags []string host string - // sizes caches the sizes of all blobs. - sizes map[restic.ID]uint + blobsize *BlobSizeCache // knownSnapshots maps snapshot timestamp to the snapshot sync.RWMutex @@ -46,16 +73,6 @@ type SnapshotsDir struct { processed restic.IDSet } -func sizeCache(midx *repository.MasterIndex) map[restic.ID]uint { - c := make(map[restic.ID]uint, 1000) - for _, idx := range midx.All() { - for pb := range idx.Each(nil) { - c[pb.ID] = pb.Length - } - } - return c -} - // NewSnapshotsDir returns a new dir object for the snapshots. func NewSnapshotsDir(repo restic.Repository, ownerIsRoot bool, paths []string, tags []string, host string) *SnapshotsDir { debug.Log("fuse mount initiated") @@ -67,7 +84,7 @@ func NewSnapshotsDir(repo restic.Repository, ownerIsRoot bool, paths []string, t host: host, knownSnapshots: make(map[string]SnapshotWithId), processed: restic.NewIDSet(), - sizes: sizeCache(repo.Index().(*repository.MasterIndex)), + blobsize: NewBlobSizeCache(repo.Index().(*repository.MasterIndex)), } } @@ -173,5 +190,5 @@ func (sn *SnapshotsDir) Lookup(ctx context.Context, name string) (fs.Node, error } } - return newDirFromSnapshot(ctx, sn.repo, snapshot, sn.ownerIsRoot, sn.sizes) + return newDirFromSnapshot(ctx, sn.repo, snapshot, sn.ownerIsRoot, sn.blobsize) } From f2a51aa37cf2a0f8f6026e110cfcbfe6e73c8511 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 7 Jun 2017 20:51:08 +0200 Subject: [PATCH 3/4] Add entry to CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92274a1be..a748694ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,10 @@ Important Changes in 0.X.Y https://github.com/restic/restic/issues/989 https://github.com/restic/restic/pull/993 + * Improved performance for the fuse mount: Listing directories which contain + large files now is significantly faster. + https://github.com/restic/restic/pull/998 + Important Changes in 0.6.1 ========================== From a46baf768558d1087ebe789305894dd207ff07e1 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 7 Jun 2017 20:51:45 +0200 Subject: [PATCH 4/4] s3: Remove cache --- src/restic/backend/s3/s3.go | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index 767c21dc0..dde45b789 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -8,7 +8,6 @@ import ( "path" "restic" "strings" - "sync" "time" "restic/backend" @@ -23,12 +22,10 @@ const connLimit = 10 // s3 is a backend which stores the data on an S3 endpoint. type s3 struct { - client *minio.Client - sem *backend.Semaphore - bucketname string - prefix string - cacheMutex sync.RWMutex - cacheObjSize map[string]int64 + client *minio.Client + sem *backend.Semaphore + bucketname string + prefix string backend.Layout } @@ -53,11 +50,10 @@ func Open(cfg Config) (restic.Backend, error) { } be := &s3{ - client: client, - sem: sem, - bucketname: cfg.Bucket, - prefix: cfg.Prefix, - cacheObjSize: make(map[string]int64), + client: client, + sem: sem, + bucketname: cfg.Bucket, + prefix: cfg.Prefix, } client.SetCustomTransport(backend.Transport())