From fe04d024c7eb598a4804cb66793f4529c2181087 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Fri, 24 Sep 2021 15:38:23 +0200 Subject: [PATCH] Use LRU cache in restic dump --- cmd/restic/cmd_dump.go | 3 ++- internal/bloblru/cache.go | 13 +++++++++++-- internal/dump/common.go | 27 ++++++++++++++++++++------- internal/dump/common_test.go | 2 -- internal/dump/tar.go | 18 +++++++++++------- internal/dump/zip.go | 18 +++++++++++------- 6 files changed, 55 insertions(+), 26 deletions(-) diff --git a/cmd/restic/cmd_dump.go b/cmd/restic/cmd_dump.go index 667dc3875..4c8ed5b1d 100644 --- a/cmd/restic/cmd_dump.go +++ b/cmd/restic/cmd_dump.go @@ -95,7 +95,8 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor if node.Name == pathComponents[0] { switch { case l == 1 && dump.IsFile(node): - return dump.GetNodeData(ctx, os.Stdout, repo, node) + cache := dump.NewCache() + return dump.WriteNodeData(ctx, os.Stdout, repo, node, cache) case l > 1 && dump.IsDir(node): subtree, err := repo.LoadTree(ctx, *node.Subtree) if err != nil { diff --git a/internal/bloblru/cache.go b/internal/bloblru/cache.go index 473a4c2e6..dc977e650 100644 --- a/internal/bloblru/cache.go +++ b/internal/bloblru/cache.go @@ -42,7 +42,9 @@ func New(size int) *Cache { return c } -func (c *Cache) Add(id restic.ID, blob []byte) { +// Add adds key id with value blob to c. +// It may return an evicted buffer for reuse. +func (c *Cache) Add(id restic.ID, blob []byte) (old []byte) { debug.Log("bloblru.Cache: add %v", id) size := len(blob) + overhead @@ -62,11 +64,18 @@ func (c *Cache) Add(id restic.ID, blob []byte) { // This loop takes at most min(maxEntries, maxchunksize/overhead) // iterations. for size > c.free { - c.c.RemoveOldest() + _, val, _ := c.c.RemoveOldest() + b := val.([]byte) + if len(b) > len(old) { + // We can only return one buffer, so pick the largest. + old = b + } } c.c.Add(key, blob) c.free -= size + + return old } func (c *Cache) Get(id restic.ID) ([]byte, bool) { diff --git a/internal/dump/common.go b/internal/dump/common.go index 39172e930..7ef0c93e4 100644 --- a/internal/dump/common.go +++ b/internal/dump/common.go @@ -5,6 +5,7 @@ import ( "io" "path" + "github.com/restic/restic/internal/bloblru" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/walker" @@ -16,6 +17,14 @@ type dumper interface { dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error } +// WriteDump will write the contents of the given tree to the given destination. +// It will loop over all nodes in the tree and dump them recursively. +type WriteDump func(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error + +func NewCache() *bloblru.Cache { + return bloblru.New(64 << 20) +} + func writeDump(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dmp dumper) error { for _, rootNode := range tree.Nodes { rootNode.Path = rootPath @@ -67,20 +76,24 @@ func dumpTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node return err } -// GetNodeData will write the contents of the node to the given output. -func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error { +// WriteNodeData writes the contents of the node to the given Writer. +func WriteNodeData(ctx context.Context, w io.Writer, repo restic.Repository, node *restic.Node, cache *bloblru.Cache) error { var ( buf []byte err error ) for _, id := range node.Content { - buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf) - if err != nil { - return err + blob, ok := cache.Get(id) + if !ok { + blob, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf) + if err != nil { + return err + } + + buf = cache.Add(id, blob) // Reuse evicted buffer. } - _, err = output.Write(buf) - if err != nil { + if _, err := w.Write(blob); err != nil { return errors.Wrap(err, "Write") } } diff --git a/internal/dump/common_test.go b/internal/dump/common_test.go index f692007be..e15659701 100644 --- a/internal/dump/common_test.go +++ b/internal/dump/common_test.go @@ -3,7 +3,6 @@ package dump import ( "bytes" "context" - "io" "testing" "github.com/restic/restic/internal/archiver" @@ -28,7 +27,6 @@ func prepareTempdirRepoSrc(t testing.TB, src archiver.TestDir) (tempdir string, } type CheckDump func(t *testing.T, testDir string, testDump *bytes.Buffer) error -type WriteDump func(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error func WriteTest(t *testing.T, wd WriteDump, cd CheckDump) { tests := []struct { diff --git a/internal/dump/tar.go b/internal/dump/tar.go index b9a9a4633..57225cf66 100644 --- a/internal/dump/tar.go +++ b/internal/dump/tar.go @@ -8,25 +8,29 @@ import ( "path/filepath" "strings" + "github.com/restic/restic/internal/bloblru" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" ) type tarDumper struct { - w *tar.Writer + cache *bloblru.Cache + w *tar.Writer } // Statically ensure that tarDumper implements dumper. -var _ dumper = tarDumper{} +var _ dumper = &tarDumper{} // WriteTar will write the contents of the given tree, encoded as a tar to the given destination. func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error { - dmp := tarDumper{w: tar.NewWriter(dst)} - + dmp := &tarDumper{ + cache: NewCache(), + w: tar.NewWriter(dst), + } return writeDump(ctx, repo, tree, rootPath, dmp) } -func (dmp tarDumper) Close() error { +func (dmp *tarDumper) Close() error { return dmp.w.Close() } @@ -39,7 +43,7 @@ const ( cISVTX = 0o1000 // Save text (sticky bit) ) -func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error { +func (dmp *tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error { relPath, err := filepath.Rel("/", node.Path) if err != nil { return err @@ -90,7 +94,7 @@ func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo resti return errors.Wrap(err, "TarHeader") } - return GetNodeData(ctx, dmp.w, repo, node) + return WriteNodeData(ctx, dmp.w, repo, node, dmp.cache) } func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string { diff --git a/internal/dump/zip.go b/internal/dump/zip.go index 69bf0a876..96e2c95b9 100644 --- a/internal/dump/zip.go +++ b/internal/dump/zip.go @@ -6,29 +6,33 @@ import ( "io" "path/filepath" + "github.com/restic/restic/internal/bloblru" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" ) type zipDumper struct { - w *zip.Writer + cache *bloblru.Cache + w *zip.Writer } // Statically ensure that zipDumper implements dumper. -var _ dumper = zipDumper{} +var _ dumper = &zipDumper{} // WriteZip will write the contents of the given tree, encoded as a zip to the given destination. func WriteZip(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error { - dmp := zipDumper{w: zip.NewWriter(dst)} - + dmp := &zipDumper{ + cache: NewCache(), + w: zip.NewWriter(dst), + } return writeDump(ctx, repo, tree, rootPath, dmp) } -func (dmp zipDumper) Close() error { +func (dmp *zipDumper) Close() error { return dmp.w.Close() } -func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error { +func (dmp *zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error { relPath, err := filepath.Rel("/", node.Path) if err != nil { return err @@ -58,5 +62,5 @@ func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo resti return nil } - return GetNodeData(ctx, w, repo, node) + return WriteNodeData(ctx, w, repo, node, dmp.cache) }