diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index c8b9154fb..0c254738a 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -387,6 +387,16 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, args []string) error { return err } + // exclude restic cache + if repo.Cache != nil { + f, err := rejectResticCache(repo) + if err != nil { + return err + } + + rejectFuncs = append(rejectFuncs, f) + } + err = repo.LoadIndex(context.TODO()) if err != nil { return err diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index b64429a0e..1a12585fd 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -19,6 +19,9 @@ var cmdCheck = &cobra.Command{ Long: ` The "check" command tests the repository for errors and reports any errors it finds. It can also be used to read all data and therefore simulate a restore. + +By default, the "check" command will always load all data directly from the +repository and not use a local cache. `, DisableAutoGenTag: true, RunE: func(cmd *cobra.Command, args []string) error { @@ -30,6 +33,7 @@ finds. It can also be used to read all data and therefore simulate a restore. type CheckOptions struct { ReadData bool CheckUnused bool + WithCache bool } var checkOptions CheckOptions @@ -40,6 +44,7 @@ func init() { f := cmdCheck.Flags() f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs") f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs") + f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache") } func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress { @@ -77,6 +82,11 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { return errors.Fatal("check has no arguments") } + if !opts.WithCache { + // do not use a cache for the checker + gopts.NoCache = true + } + repo, err := OpenRepository(gopts) if err != nil { return err diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index d48d00192..1383d15a4 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -85,6 +85,25 @@ func runPrune(gopts GlobalOptions) error { return pruneRepository(gopts, repo) } +func mixedBlobs(list []restic.Blob) bool { + var tree, data bool + + for _, pb := range list { + switch pb.Type { + case restic.TreeBlob: + tree = true + case restic.DataBlob: + data = true + } + + if tree && data { + return true + } + } + + return false +} + func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { ctx := gopts.ctx @@ -191,6 +210,11 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { // find packs that need a rewrite rewritePacks := restic.NewIDSet() for _, pack := range idx.Packs { + if mixedBlobs(pack.Entries) { + rewritePacks.Insert(pack.ID) + continue + } + for _, blob := range pack.Entries { h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} if !usedBlobs.Has(h) { diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index 369c4df9a..bb14bfafb 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -12,6 +12,7 @@ import ( "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/repository" ) // RejectFunc is a function that takes a filename and os.FileInfo of a @@ -177,3 +178,27 @@ func rejectByDevice(samples []string) (RejectFunc, error) { panic(fmt.Sprintf("item %v, device id %v not found, allowedDevs: %v", item, id, allowed)) }, nil } + +// rejectResticCache returns a RejectFunc that rejects the restic cache +// directory (if set). +func rejectResticCache(repo *repository.Repository) (RejectFunc, error) { + if repo.Cache == nil { + return func(string, os.FileInfo) bool { + return false + }, nil + } + cacheBase := repo.Cache.BaseDir() + + if cacheBase == "" { + return nil, errors.New("cacheBase is empty string") + } + + return func(item string, _ os.FileInfo) bool { + if fs.HasPathPrefix(cacheBase, item) { + debug.Log("rejecting restic cache directory %v", item) + return true + } + + return false + }, nil +} diff --git a/cmd/restic/global.go b/cmd/restic/global.go index 3b9f2853a..217d6f732 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -19,6 +19,7 @@ import ( "github.com/restic/restic/internal/backend/s3" "github.com/restic/restic/internal/backend/sftp" "github.com/restic/restic/internal/backend/swift" + "github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/options" "github.com/restic/restic/internal/repository" @@ -38,6 +39,8 @@ type GlobalOptions struct { Quiet bool NoLock bool JSON bool + CacheDir string + NoCache bool ctx context.Context password string @@ -68,7 +71,8 @@ func init() { f.BoolVarP(&globalOptions.Quiet, "quiet", "q", false, "do not output comprehensive progress report") f.BoolVar(&globalOptions.NoLock, "no-lock", false, "do not lock the repo, this allows some operations on read-only repos") f.BoolVarP(&globalOptions.JSON, "json", "", false, "set output mode to JSON for commands that support it") - + f.StringVar(&globalOptions.CacheDir, "cache-dir", "", "set the cache directory") + f.BoolVar(&globalOptions.NoCache, "no-cache", false, "do not use a local cache") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") restoreTerminal() @@ -322,6 +326,17 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { return nil, err } + if opts.NoCache { + return s, nil + } + + cache, err := cache.New(s.Config().ID, opts.CacheDir) + if err != nil { + Warnf("unable to open cache: %v\n", err) + } else { + s.UseCache(cache) + } + return s, nil } diff --git a/cmd/restic/integration_helpers_test.go b/cmd/restic/integration_helpers_test.go index 3d0ad51ba..305929b6a 100644 --- a/cmd/restic/integration_helpers_test.go +++ b/cmd/restic/integration_helpers_test.go @@ -199,6 +199,7 @@ func withTestEnvironment(t testing.TB) (env *testEnvironment, cleanup func()) { env.gopts = GlobalOptions{ Repo: env.repo, Quiet: true, + CacheDir: env.cache, ctx: context.Background(), password: TestPassword, stdout: os.Stdout, diff --git a/doc/cache.rst b/doc/cache.rst new file mode 100644 index 000000000..b9dbf2797 --- /dev/null +++ b/doc/cache.rst @@ -0,0 +1,26 @@ +Local Cache +=========== + +In order to speed up certain operations, restic manages a local cache of data. +This document describes the data structures for the local cache with version 1. + +Versions +-------- + +The cache directory is selected according to the `XDG base dir specification +`__. +Each repository has its own cache sub-directory, consting of the repository ID +which is chosen at ``init``. All cache directories for different repos are +independent of each other. + +The cache dir for a repo contains a file named ``version``, which contains a +single ASCII integer line that stands for the current version of the cache. If +a lower version number is found the cache is recreated with the current +version. If a higher version number is found the cache is ignored and left as +is. + +Snapshots and Indexes +--------------------- + +Snapshot, Data and Index files are cached in the sub-directories ``snapshots``, +``data`` and ``index``, as read from the repository. diff --git a/doc/man/restic-autocomplete.1 b/doc/man/restic-autocomplete.1 index aeb3f4e72..f4450189e 100644 --- a/doc/man/restic-autocomplete.1 +++ b/doc/man/restic-autocomplete.1 @@ -40,10 +40,18 @@ $ sudo restic autocomplete .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-backup.1 b/doc/man/restic-backup.1 index 742adc928..fc3400645 100644 --- a/doc/man/restic-backup.1 +++ b/doc/man/restic-backup.1 @@ -78,10 +78,18 @@ given as the arguments. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-cat.1 b/doc/man/restic-cat.1 index 765943ad3..156cb8d32 100644 --- a/doc/man/restic-cat.1 +++ b/doc/man/restic-cat.1 @@ -25,10 +25,18 @@ The "cat" command is used to print internal objects to stdout. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-check.1 b/doc/man/restic-check.1 index 22493f8c6..2b6a3cb11 100644 --- a/doc/man/restic-check.1 +++ b/doc/man/restic-check.1 @@ -18,6 +18,10 @@ restic\-check \- Check the repository for errors The "check" command tests the repository for errors and reports any errors it finds. It can also be used to read all data and therefore simulate a restore. +.PP +By default, the "check" command will always load all data directly from the +repository and not use a local cache. + .SH OPTIONS .PP @@ -32,12 +36,24 @@ finds. It can also be used to read all data and therefore simulate a restore. \fB\-\-read\-data\fP[=false] read all data blobs +.PP +\fB\-\-with\-cache\fP[=false] + use the cache + .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-dump.1 b/doc/man/restic-dump.1 index 32d5e4ce8..afab6f159 100644 --- a/doc/man/restic-dump.1 +++ b/doc/man/restic-dump.1 @@ -26,10 +26,18 @@ is used for debugging purposes only. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-find.1 b/doc/man/restic-find.1 index 89d31242d..1c0ac962d 100644 --- a/doc/man/restic-find.1 +++ b/doc/man/restic-find.1 @@ -58,10 +58,18 @@ repo. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-forget.1 b/doc/man/restic-forget.1 index 0a18e93f5..06ac56dcb 100644 --- a/doc/man/restic-forget.1 +++ b/doc/man/restic-forget.1 @@ -88,10 +88,18 @@ data after 'forget' was run successfully, see the 'prune' command. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-init.1 b/doc/man/restic-init.1 index f7d0be5af..ece3db2bc 100644 --- a/doc/man/restic-init.1 +++ b/doc/man/restic-init.1 @@ -25,10 +25,18 @@ The "init" command initializes a new repository. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-key.1 b/doc/man/restic-key.1 index bc0664452..81f4abf04 100644 --- a/doc/man/restic-key.1 +++ b/doc/man/restic-key.1 @@ -25,10 +25,18 @@ The "key" command manages keys (passwords) for accessing the repository. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-list.1 b/doc/man/restic-list.1 index ccc1e8b0f..92793f6c7 100644 --- a/doc/man/restic-list.1 +++ b/doc/man/restic-list.1 @@ -25,10 +25,18 @@ The "list" command allows listing objects in the repository based on type. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-ls.1 b/doc/man/restic-ls.1 index 8f81194c0..e0dae4e21 100644 --- a/doc/man/restic-ls.1 +++ b/doc/man/restic-ls.1 @@ -44,10 +44,18 @@ The special snapshot\-ID "latest" can be used to list files and directories of t .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-manpage.1 b/doc/man/restic-manpage.1 index d8d1bcc30..6f9d5f898 100644 --- a/doc/man/restic-manpage.1 +++ b/doc/man/restic-manpage.1 @@ -31,10 +31,18 @@ set and no command is specified, all manpages are written to the directory. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-migrate.1 b/doc/man/restic-migrate.1 index f65a0aae5..b8a90a854 100644 --- a/doc/man/restic-migrate.1 +++ b/doc/man/restic-migrate.1 @@ -30,10 +30,18 @@ name is explicitly given, a list of migrations that can be applied is printed. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-mount.1 b/doc/man/restic-mount.1 index edf3e4da3..324208067 100644 --- a/doc/man/restic-mount.1 +++ b/doc/man/restic-mount.1 @@ -50,10 +50,18 @@ read\-only mount. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-prune.1 b/doc/man/restic-prune.1 index dfe390fe2..fde9a69d8 100644 --- a/doc/man/restic-prune.1 +++ b/doc/man/restic-prune.1 @@ -26,10 +26,18 @@ referenced and therefore not needed any more. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-rebuild-index.1 b/doc/man/restic-rebuild-index.1 index f406f8bc6..83baa4509 100644 --- a/doc/man/restic-rebuild-index.1 +++ b/doc/man/restic-rebuild-index.1 @@ -26,10 +26,18 @@ repository. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-restore.1 b/doc/man/restic-restore.1 index 86fcbbfc3..bef55324b 100644 --- a/doc/man/restic-restore.1 +++ b/doc/man/restic-restore.1 @@ -54,10 +54,18 @@ repository. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-snapshots.1 b/doc/man/restic-snapshots.1 index 21e9d9536..c71f16b3a 100644 --- a/doc/man/restic-snapshots.1 +++ b/doc/man/restic-snapshots.1 @@ -41,10 +41,18 @@ The "snapshots" command lists all snapshots stored in the repository. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-tag.1 b/doc/man/restic-tag.1 index 890fdad0b..031922ea8 100644 --- a/doc/man/restic-tag.1 +++ b/doc/man/restic-tag.1 @@ -56,10 +56,18 @@ When no snapshot\-ID is given, all snapshots matching the host, tag and path fil .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-unlock.1 b/doc/man/restic-unlock.1 index 9b520d4b3..19430880c 100644 --- a/doc/man/restic-unlock.1 +++ b/doc/man/restic-unlock.1 @@ -29,10 +29,18 @@ The "unlock" command removes stale locks that have been created by other restic .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic-version.1 b/doc/man/restic-version.1 index 03599f39d..e1a8c2782 100644 --- a/doc/man/restic-version.1 +++ b/doc/man/restic-version.1 @@ -26,10 +26,18 @@ and the version of this software. .SH OPTIONS INHERITED FROM PARENT COMMANDS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/man/restic.1 b/doc/man/restic.1 index 492cf4c56..a07af6bd5 100644 --- a/doc/man/restic.1 +++ b/doc/man/restic.1 @@ -20,6 +20,10 @@ directories in an encrypted repository stored on different backends. .SH OPTIONS +.PP +\fB\-\-cache\-dir\fP="" + set the cache directory + .PP \fB\-h\fP, \fB\-\-help\fP[=false] help for restic @@ -28,6 +32,10 @@ directories in an encrypted repository stored on different backends. \fB\-\-json\fP[=false] set output mode to JSON for commands that support it +.PP +\fB\-\-no\-cache\fP[=false] + do not use a local cache + .PP \fB\-\-no\-lock\fP[=false] do not lock the repo, this allows some operations on read\-only repos diff --git a/doc/manual.rst b/doc/manual.rst index 6841deb21..2e79637b1 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -1240,3 +1240,19 @@ instead of the default, set the environment variable like this: $ export TMPDIR=/var/tmp/restic-tmp $ restic -r /tmp/backup backup ~/work + +Caching +------- + +Restic keeps a cache with some files from the repository on the local machine. +This allows faster operations, since meta data does not need to be loaded from +a remote repository. The cache is automatically created, usually in the +directory ``.cache/restic`` in the user's home directory. The environment +variable ``XDG_CACHE_DIR`` or the command line parameter ``--cache-dir`` can +each be used to specify where the cache is located. The parameter +``--no-cache`` disables the cache entirely. In this case, all data is loaded +from the repo. + +The cache is ephemeral: When a file cannot be read from the cache, it is loaded +from the repository. + diff --git a/doc/references.rst b/doc/references.rst index 5c68a1f1d..c5672b769 100644 --- a/doc/references.rst +++ b/doc/references.rst @@ -7,3 +7,7 @@ References ------------------------ .. include:: rest_backend.rst + +------------------------ + +.. include:: cache.rst diff --git a/internal/cache/backend.go b/internal/cache/backend.go new file mode 100644 index 000000000..eeb67e1c0 --- /dev/null +++ b/internal/cache/backend.go @@ -0,0 +1,170 @@ +package cache + +import ( + "context" + "io" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/restic" +) + +// Backend wraps a restic.Backend and adds a cache. +type Backend struct { + restic.Backend + *Cache +} + +// ensure cachedBackend implements restic.Backend +var _ restic.Backend = &Backend{} + +// Remove deletes a file from the backend and the cache if it has been cached. +func (b *Backend) Remove(ctx context.Context, h restic.Handle) error { + debug.Log("cache Remove(%v)", h) + err := b.Backend.Remove(ctx, h) + if err != nil { + return err + } + + return b.Cache.Remove(h) +} + +type teeReader struct { + rd io.Reader + wr io.Writer + err error +} + +func (t *teeReader) Read(p []byte) (n int, err error) { + n, err = t.rd.Read(p) + if t.err == nil && n > 0 { + _, t.err = t.wr.Write(p[:n]) + } + + return n, err +} + +var autoCacheTypes = map[restic.FileType]struct{}{ + restic.IndexFile: struct{}{}, + restic.SnapshotFile: struct{}{}, +} + +// Save stores a new file is the backend and the cache. +func (b *Backend) Save(ctx context.Context, h restic.Handle, rd io.Reader) (err error) { + if _, ok := autoCacheTypes[h.Type]; !ok { + return b.Backend.Save(ctx, h, rd) + } + + debug.Log("Save(%v): auto-store in the cache", h) + wr, err := b.Cache.SaveWriter(h) + if err != nil { + debug.Log("unable to save %v to cache: %v", h, err) + return b.Backend.Save(ctx, h, rd) + } + + tr := &teeReader{rd: rd, wr: wr} + err = b.Backend.Save(ctx, h, tr) + if err != nil { + wr.Close() + b.Cache.Remove(h) + return err + } + + err = wr.Close() + if err != nil { + debug.Log("cache writer returned error: %v", err) + _ = b.Cache.Remove(h) + } + return nil +} + +var autoCacheFiles = map[restic.FileType]bool{ + restic.IndexFile: true, + restic.SnapshotFile: true, +} + +func (b *Backend) cacheFile(ctx context.Context, h restic.Handle) error { + rd, err := b.Backend.Load(ctx, h, 0, 0) + if err != nil { + return err + } + + if err = b.Cache.Save(h, rd); err != nil { + return err + } + + if err = rd.Close(); err != nil { + // try to remove from the cache, ignore errors + _ = b.Cache.Remove(h) + return err + } + + return nil +} + +// Load loads a file from the cache or the backend. +func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + if b.Cache.Has(h) { + debug.Log("Load(%v, %v, %v) from cache", h, length, offset) + rd, err := b.Cache.Load(h, length, offset) + if err == nil { + return rd, nil + } + debug.Log("error loading %v from cache: %v", h, err) + } + + // partial file requested + if offset != 0 || length != 0 { + if b.Cache.PerformReadahead(h) { + debug.Log("performing readahead for %v", h) + err := b.cacheFile(ctx, h) + if err == nil { + return b.Cache.Load(h, length, offset) + } + + debug.Log("error caching %v: %v", h, err) + } + + debug.Log("Load(%v, %v, %v): partial file requested, delegating to backend", h, length, offset) + return b.Backend.Load(ctx, h, length, offset) + } + + // if we don't automatically cache this file type, fall back to the backend + if _, ok := autoCacheFiles[h.Type]; !ok { + debug.Log("Load(%v, %v, %v): delegating to backend", h, length, offset) + return b.Backend.Load(ctx, h, length, offset) + } + + debug.Log("auto-store %v in the cache", h) + err := b.cacheFile(ctx, h) + + if err == nil { + // load the cached version + return b.Cache.Load(h, 0, 0) + } + + debug.Log("error caching %v: %v, falling back to backend", h, err) + return b.Backend.Load(ctx, h, length, offset) +} + +// Stat tests whether the backend has a file. If it does not exist but still +// exists in the cache, it is removed from the cache. +func (b *Backend) Stat(ctx context.Context, h restic.Handle) (restic.FileInfo, error) { + debug.Log("cache Stat(%v)", h) + + fi, err := b.Backend.Stat(ctx, h) + if err != nil { + if b.Backend.IsNotExist(err) { + // try to remove from the cache, ignore errors + _ = b.Cache.Remove(h) + } + + return fi, err + } + + return fi, err +} + +// IsNotExist returns true if the error is caused by a non-existing file. +func (b *Backend) IsNotExist(err error) bool { + return b.Backend.IsNotExist(err) +} diff --git a/internal/cache/backend_test.go b/internal/cache/backend_test.go new file mode 100644 index 000000000..dc7270a89 --- /dev/null +++ b/internal/cache/backend_test.go @@ -0,0 +1,114 @@ +package cache + +import ( + "bytes" + "context" + "math/rand" + "testing" + + "github.com/restic/restic/internal/backend" + "github.com/restic/restic/internal/backend/mem" + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" +) + +func loadAndCompare(t testing.TB, be restic.Backend, h restic.Handle, data []byte) { + buf, err := backend.LoadAll(context.TODO(), be, h) + if err != nil { + t.Fatal(err) + } + + if len(buf) != len(data) { + t.Fatalf("wrong number of bytes read, want %v, got %v", len(data), len(buf)) + } + + if !bytes.Equal(buf, data) { + t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[:16], buf[:16]) + } +} + +func save(t testing.TB, be restic.Backend, h restic.Handle, data []byte) { + err := be.Save(context.TODO(), h, bytes.NewReader(data)) + if err != nil { + t.Fatal(err) + } +} + +func remove(t testing.TB, be restic.Backend, h restic.Handle) { + err := be.Remove(context.TODO(), h) + if err != nil { + t.Fatal(err) + } +} + +func randomData(n int) (restic.Handle, []byte) { + data := test.Random(rand.Int(), n) + id := restic.Hash(data) + copy(id[:], data) + h := restic.Handle{ + Type: restic.IndexFile, + Name: id.String(), + } + return h, data +} + +func TestBackend(t *testing.T) { + be := mem.New() + + c, cleanup := TestNewCache(t) + defer cleanup() + + wbe := c.Wrap(be) + + h, data := randomData(5234142) + + // save directly in backend + save(t, be, h, data) + if c.Has(h) { + t.Errorf("cache has file too early") + } + + // load data via cache + loadAndCompare(t, wbe, h, data) + if !c.Has(h) { + t.Errorf("cache dosen't have file after load") + } + + // remove via cache + remove(t, wbe, h) + if c.Has(h) { + t.Errorf("cache has file after remove") + } + + // save via cache + save(t, wbe, h, data) + if !c.Has(h) { + t.Errorf("cache dosen't have file after load") + } + + // load data directly from backend + loadAndCompare(t, be, h, data) + + // load data via cache + loadAndCompare(t, be, h, data) + + // remove directly + remove(t, be, h) + if !c.Has(h) { + t.Errorf("file not in cache any more") + } + + // run stat + _, err := wbe.Stat(context.TODO(), h) + if err == nil { + t.Errorf("expected error for removed file not found, got nil") + } + + if !wbe.IsNotExist(err) { + t.Errorf("Stat() returned error that does not match IsNotExist(): %v", err) + } + + if c.Has(h) { + t.Errorf("removed file still in cache after stat") + } +} diff --git a/internal/cache/cache.go b/internal/cache/cache.go new file mode 100644 index 000000000..6fb3de1f7 --- /dev/null +++ b/internal/cache/cache.go @@ -0,0 +1,167 @@ +package cache + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + "github.com/pkg/errors" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/restic" +) + +// Cache manages a local cache. +type Cache struct { + Path string + Base string + PerformReadahead func(restic.Handle) bool +} + +const dirMode = 0700 +const fileMode = 0600 + +func readVersion(dir string) (v uint, err error) { + buf, err := ioutil.ReadFile(filepath.Join(dir, "version")) + if os.IsNotExist(err) { + return 0, nil + } + + if err != nil { + return 0, errors.Wrap(err, "ReadFile") + } + + ver, err := strconv.ParseUint(string(buf), 10, 32) + if err != nil { + return 0, errors.Wrap(err, "ParseUint") + } + + return uint(ver), nil +} + +const cacheVersion = 1 + +// ensure Cache implements restic.Cache +var _ restic.Cache = &Cache{} + +var cacheLayoutPaths = map[restic.FileType]string{ + restic.DataFile: "data", + restic.SnapshotFile: "snapshots", + restic.IndexFile: "index", +} + +const cachedirTagSignature = "Signature: 8a477f597d28d172789f06886806bc55\n" + +func writeCachedirTag(dir string) error { + if err := fs.MkdirAll(dir, dirMode); err != nil { + return err + } + + f, err := fs.OpenFile(filepath.Join(dir, "CACHEDIR.TAG"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644) + if err != nil { + if os.IsExist(errors.Cause(err)) { + return nil + } + + return errors.Wrap(err, "OpenFile") + } + + debug.Log("Create CACHEDIR.TAG at %v", dir) + if _, err := f.Write([]byte(cachedirTagSignature)); err != nil { + f.Close() + return errors.Wrap(err, "Write") + } + + return f.Close() +} + +// New returns a new cache for the repo ID at basedir. If basedir is the empty +// string, the default cache location (according to the XDG standard) is used. +// +// For partial files, the complete file is loaded and stored in the cache when +// performReadahead returns true. +func New(id string, basedir string) (c *Cache, err error) { + if basedir == "" { + basedir, err = getXDGCacheDir() + if err != nil { + return nil, err + } + } + + // create base dir and tag it as a cache directory + if err = writeCachedirTag(basedir); err != nil { + return nil, err + } + + cachedir := filepath.Join(basedir, id) + debug.Log("using cache dir %v", cachedir) + + v, err := readVersion(cachedir) + if err != nil { + return nil, err + } + + if v > cacheVersion { + return nil, errors.New("cache version is newer") + } + + // create the repo cache dir if it does not exist yet + if err = fs.MkdirAll(cachedir, dirMode); err != nil { + return nil, err + } + + if v < cacheVersion { + err = ioutil.WriteFile(filepath.Join(cachedir, "version"), []byte(fmt.Sprintf("%d", cacheVersion)), 0644) + if err != nil { + return nil, errors.Wrap(err, "WriteFile") + } + } + + for _, p := range cacheLayoutPaths { + if err = fs.MkdirAll(filepath.Join(cachedir, p), dirMode); err != nil { + return nil, err + } + } + + c = &Cache{ + Path: cachedir, + Base: basedir, + PerformReadahead: func(restic.Handle) bool { + // do not perform readahead by default + return false + }, + } + + return c, nil +} + +// errNoSuchFile is returned when a file is not cached. +type errNoSuchFile struct { + Type string + Name string +} + +func (e errNoSuchFile) Error() string { + return fmt.Sprintf("file %v (%v) is not cached", e.Name, e.Type) +} + +// IsNotExist returns true if the error was caused by a non-existing file. +func (c *Cache) IsNotExist(err error) bool { + _, ok := errors.Cause(err).(errNoSuchFile) + return ok +} + +// Wrap returns a backend with a cache. +func (c *Cache) Wrap(be restic.Backend) restic.Backend { + return &Backend{ + Backend: be, + Cache: c, + } +} + +// BaseDir returns the base directory. +func (c *Cache) BaseDir() string { + return c.Base +} diff --git a/internal/cache/dir.go b/internal/cache/dir.go new file mode 100644 index 000000000..1f158b5fe --- /dev/null +++ b/internal/cache/dir.go @@ -0,0 +1,49 @@ +package cache + +import ( + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/fs" +) + +// getXDGCacheDir returns the cache directory according to XDG basedir spec, see +// http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html +func getXDGCacheDir() (string, error) { + xdgcache := os.Getenv("XDG_CACHE_HOME") + home := os.Getenv("HOME") + + if xdgcache == "" && home == "" { + return "", errors.New("unable to locate cache directory (XDG_CACHE_HOME and HOME unset)") + } + + cachedir := "" + if xdgcache != "" { + cachedir = filepath.Join(xdgcache, "restic") + } else if home != "" { + cachedir = filepath.Join(home, ".cache", "restic") + } + + fi, err := fs.Stat(cachedir) + if os.IsNotExist(errors.Cause(err)) { + err = fs.MkdirAll(cachedir, 0700) + if err != nil { + return "", errors.Wrap(err, "MkdirAll") + } + + fi, err = fs.Stat(cachedir) + debug.Log("create cache dir %v", cachedir) + } + + if err != nil { + return "", errors.Wrap(err, "Stat") + } + + if !fi.IsDir() { + return "", errors.Errorf("cache dir %v is not a directory", cachedir) + } + + return cachedir, nil +} diff --git a/internal/cache/file.go b/internal/cache/file.go new file mode 100644 index 000000000..4239a383c --- /dev/null +++ b/internal/cache/file.go @@ -0,0 +1,207 @@ +package cache + +import ( + "io" + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/restic/restic/internal/crypto" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/restic" +) + +func (c *Cache) filename(h restic.Handle) string { + if len(h.Name) < 2 { + panic("Name is empty or too short") + } + subdir := h.Name[:2] + return filepath.Join(c.Path, cacheLayoutPaths[h.Type], subdir, h.Name) +} + +func (c *Cache) canBeCached(t restic.FileType) bool { + if c == nil { + return false + } + + if _, ok := cacheLayoutPaths[t]; !ok { + return false + } + + return true +} + +type readCloser struct { + io.Reader + io.Closer +} + +// Load returns a reader that yields the contents of the file with the +// given handle. rd must be closed after use. If an error is returned, the +// ReadCloser is nil. +func (c *Cache) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + debug.Log("Load from cache: %v", h) + if !c.canBeCached(h.Type) { + return nil, errors.New("cannot be cached") + } + + f, err := fs.Open(c.filename(h)) + if err != nil { + return nil, errors.Wrap(err, "Open") + } + + fi, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, errors.Wrap(err, "Stat") + } + + if fi.Size() <= crypto.Extension { + _ = f.Close() + _ = c.Remove(h) + return nil, errors.New("cached file is truncated, removing") + } + + if offset > 0 { + if _, err = f.Seek(offset, io.SeekStart); err != nil { + f.Close() + return nil, err + } + } + + rd := readCloser{Reader: f, Closer: f} + if length > 0 { + rd.Reader = io.LimitReader(f, int64(length)) + } + + return rd, nil +} + +// SaveWriter returns a writer for the cache object h. It must be closed after writing is finished. +func (c *Cache) SaveWriter(h restic.Handle) (io.WriteCloser, error) { + debug.Log("Save to cache: %v", h) + if !c.canBeCached(h.Type) { + return nil, errors.New("cannot be cached") + } + + p := c.filename(h) + err := fs.MkdirAll(filepath.Dir(p), 0700) + if err != nil { + return nil, errors.Wrap(err, "MkdirAll") + } + + f, err := fs.OpenFile(p, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0400) + if err != nil { + return nil, errors.Wrap(err, "Create") + } + + return f, err +} + +// Save saves a file in the cache. +func (c *Cache) Save(h restic.Handle, rd io.Reader) error { + debug.Log("Save to cache: %v", h) + if rd == nil { + return errors.New("Save() called with nil reader") + } + + f, err := c.SaveWriter(h) + if err != nil { + return err + } + + if _, err = io.Copy(f, rd); err != nil { + _ = f.Close() + _ = c.Remove(h) + return errors.Wrap(err, "Copy") + } + + if err = f.Close(); err != nil { + return errors.Wrap(err, "Close") + } + + return nil +} + +// Remove deletes a file. When the file is not cache, no error is returned. +func (c *Cache) Remove(h restic.Handle) error { + if !c.Has(h) { + return nil + } + + return fs.Remove(c.filename(h)) +} + +// Clear removes all files of type t from the cache that are not contained in +// the set valid. +func (c *Cache) Clear(t restic.FileType, valid restic.IDSet) error { + debug.Log("Clearing cache for %v: %v valid files", t, len(valid)) + if !c.canBeCached(t) { + return nil + } + + list, err := c.list(t) + if err != nil { + return err + } + + for id := range list { + if valid.Has(id) { + continue + } + + if err = fs.Remove(c.filename(restic.Handle{Type: t, Name: id.String()})); err != nil { + return err + } + } + + return nil +} + +func isFile(fi os.FileInfo) bool { + return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0 +} + +// list returns a list of all files of type T in the cache. +func (c *Cache) list(t restic.FileType) (restic.IDSet, error) { + if !c.canBeCached(t) { + return nil, errors.New("cannot be cached") + } + + list := restic.NewIDSet() + dir := filepath.Join(c.Path, cacheLayoutPaths[t]) + err := filepath.Walk(dir, func(name string, fi os.FileInfo, err error) error { + if err != nil { + return errors.Wrap(err, "Walk") + } + + if !isFile(fi) { + return nil + } + + id, err := restic.ParseID(filepath.Base(name)) + if err != nil { + return nil + } + + list.Insert(id) + return nil + }) + + return list, err +} + +// Has returns true if the file is cached. +func (c *Cache) Has(h restic.Handle) bool { + if !c.canBeCached(h.Type) { + return false + } + + _, err := fs.Stat(c.filename(h)) + if err == nil { + return true + } + + return false +} diff --git a/internal/cache/file_test.go b/internal/cache/file_test.go new file mode 100644 index 000000000..35ce762c0 --- /dev/null +++ b/internal/cache/file_test.go @@ -0,0 +1,259 @@ +package cache + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "math/rand" + "testing" + "time" + + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" +) + +func generateRandomFiles(t testing.TB, tpe restic.FileType, c *Cache) restic.IDSet { + ids := restic.NewIDSet() + for i := 0; i < rand.Intn(15)+10; i++ { + buf := test.Random(rand.Int(), 1<<19) + id := restic.Hash(buf) + h := restic.Handle{Type: tpe, Name: id.String()} + + if c.Has(h) { + t.Errorf("index %v present before save", id) + } + + err := c.Save(h, bytes.NewReader(buf)) + if err != nil { + t.Fatal(err) + } + ids.Insert(id) + } + return ids +} + +// randomID returns a random ID from s. +func randomID(s restic.IDSet) restic.ID { + for id := range s { + return id + } + panic("set is empty") +} + +func load(t testing.TB, c *Cache, h restic.Handle) []byte { + rd, err := c.Load(h, 0, 0) + if err != nil { + t.Fatal(err) + } + + if rd == nil { + t.Fatalf("Load() returned nil reader") + } + + buf, err := ioutil.ReadAll(rd) + if err != nil { + t.Fatal(err) + } + + if err = rd.Close(); err != nil { + t.Fatal(err) + } + + return buf +} + +func listFiles(t testing.TB, c *Cache, tpe restic.FileType) restic.IDSet { + list, err := c.list(tpe) + if err != nil { + t.Errorf("listing failed: %v", err) + } + + return list +} + +func clearFiles(t testing.TB, c *Cache, tpe restic.FileType, valid restic.IDSet) { + if err := c.Clear(tpe, valid); err != nil { + t.Error(err) + } +} + +func TestFiles(t *testing.T) { + seed := time.Now().Unix() + t.Logf("seed is %v", seed) + rand.Seed(seed) + + c, cleanup := TestNewCache(t) + defer cleanup() + + var tests = []restic.FileType{ + restic.SnapshotFile, + restic.DataFile, + restic.IndexFile, + } + + for _, tpe := range tests { + t.Run(fmt.Sprintf("%v", tpe), func(t *testing.T) { + ids := generateRandomFiles(t, tpe, c) + id := randomID(ids) + + h := restic.Handle{Type: tpe, Name: id.String()} + id2 := restic.Hash(load(t, c, h)) + + if !id.Equal(id2) { + t.Errorf("wrong data returned, want %v, got %v", id.Str(), id2.Str()) + } + + if !c.Has(h) { + t.Errorf("cache thinks index %v isn't present", id.Str()) + } + + list := listFiles(t, c, tpe) + if !ids.Equals(list) { + t.Errorf("wrong list of index IDs returned, want:\n %v\ngot:\n %v", ids, list) + } + + clearFiles(t, c, tpe, restic.NewIDSet(id)) + list2 := listFiles(t, c, tpe) + ids.Delete(id) + want := restic.NewIDSet(id) + if !list2.Equals(want) { + t.Errorf("ClearIndexes removed indexes, want:\n %v\ngot:\n %v", list2, want) + } + + clearFiles(t, c, tpe, restic.NewIDSet()) + want = restic.NewIDSet() + list3 := listFiles(t, c, tpe) + if !list3.Equals(want) { + t.Errorf("ClearIndexes returned a wrong list, want:\n %v\ngot:\n %v", want, list3) + } + }) + } +} + +func TestFileSaveWriter(t *testing.T) { + seed := time.Now().Unix() + t.Logf("seed is %v", seed) + rand.Seed(seed) + + c, cleanup := TestNewCache(t) + defer cleanup() + + // save about 5 MiB of data in the cache + data := test.Random(rand.Int(), 5234142) + id := restic.ID{} + copy(id[:], data) + h := restic.Handle{ + Type: restic.DataFile, + Name: id.String(), + } + + wr, err := c.SaveWriter(h) + if err != nil { + t.Fatal(err) + } + + n, err := io.Copy(wr, bytes.NewReader(data)) + if err != nil { + t.Fatal(err) + } + + if n != int64(len(data)) { + t.Fatalf("wrong number of bytes written, want %v, got %v", len(data), n) + } + + if err = wr.Close(); err != nil { + t.Fatal(err) + } + + rd, err := c.Load(h, 0, 0) + if err != nil { + t.Fatal(err) + } + + buf, err := ioutil.ReadAll(rd) + if err != nil { + t.Fatal(err) + } + + if len(buf) != len(data) { + t.Fatalf("wrong number of bytes read, want %v, got %v", len(data), len(buf)) + } + + if !bytes.Equal(buf, data) { + t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[:16], buf[:16]) + } + + if err = rd.Close(); err != nil { + t.Fatal(err) + } +} + +func TestFileLoad(t *testing.T) { + seed := time.Now().Unix() + t.Logf("seed is %v", seed) + rand.Seed(seed) + + c, cleanup := TestNewCache(t) + defer cleanup() + + // save about 5 MiB of data in the cache + data := test.Random(rand.Int(), 5234142) + id := restic.ID{} + copy(id[:], data) + h := restic.Handle{ + Type: restic.DataFile, + Name: id.String(), + } + if err := c.Save(h, bytes.NewReader(data)); err != nil { + t.Fatalf("Save() returned error: %v", err) + } + + var tests = []struct { + offset int64 + length int + }{ + {0, 0}, + {5, 0}, + {32*1024 + 5, 0}, + {0, 123}, + {0, 64*1024 + 234}, + {100, 5234142}, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%v/%v", test.length, test.offset), func(t *testing.T) { + rd, err := c.Load(h, test.length, test.offset) + if err != nil { + t.Fatal(err) + } + + buf, err := ioutil.ReadAll(rd) + if err != nil { + t.Fatal(err) + } + + if err = rd.Close(); err != nil { + t.Fatal(err) + } + + o := int(test.offset) + l := test.length + if test.length == 0 { + l = len(data) - o + } + + if l > len(data)-o { + l = len(data) - o + } + + if len(buf) != l { + t.Fatalf("wrong number of bytes returned: want %d, got %d", l, len(buf)) + } + + if !bytes.Equal(buf, data[o:o+l]) { + t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[o:o+16], buf[:16]) + } + }) + } +} diff --git a/internal/cache/testing.go b/internal/cache/testing.go new file mode 100644 index 000000000..b3156374d --- /dev/null +++ b/internal/cache/testing.go @@ -0,0 +1,20 @@ +package cache + +import ( + "testing" + + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" +) + +// TestNewCache returns a cache in a temporary directory which is removed when +// cleanup is called. +func TestNewCache(t testing.TB) (*Cache, func()) { + dir, cleanup := test.TempDir(t) + t.Logf("created new cache at %v", dir) + cache, err := New(restic.NewRandomID().String(), dir) + if err != nil { + t.Fatal(err) + } + return cache, cleanup +} diff --git a/internal/repository/index.go b/internal/repository/index.go index 62d922fce..70664fad0 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -15,8 +15,9 @@ import ( // Index holds a lookup table for id -> pack. type Index struct { - m sync.Mutex - pack map[restic.BlobHandle][]indexEntry + m sync.Mutex + pack map[restic.BlobHandle][]indexEntry + treePacks restic.IDs final bool // set to true for all indexes read from the backend ("finalized") id restic.ID // set to the ID of the index when it's finalized @@ -437,6 +438,11 @@ func (idx *Index) Dump(w io.Writer) error { return nil } +// TreePacks returns a list of packs that contain only tree blobs. +func (idx *Index) TreePacks() restic.IDs { + return idx.treePacks +} + // isErrOldIndex returns true if the error may be caused by an old index // format. func isErrOldIndex(err error) bool { @@ -469,6 +475,8 @@ func DecodeIndex(buf []byte) (idx *Index, err error) { idx = NewIndex() for _, pack := range idxJSON.Packs { + var data, tree bool + for _, blob := range pack.Blobs { idx.store(restic.PackedBlob{ Blob: restic.Blob{ @@ -479,6 +487,17 @@ func DecodeIndex(buf []byte) (idx *Index, err error) { }, PackID: pack.ID, }) + + switch blob.Type { + case restic.DataBlob: + data = true + case restic.TreeBlob: + tree = true + } + } + + if !data && tree { + idx.treePacks = append(idx.treePacks, pack.ID) } } idx.supersedes = idxJSON.Supersedes @@ -501,6 +520,8 @@ func DecodeOldIndex(buf []byte) (idx *Index, err error) { idx = NewIndex() for _, pack := range list { + var data, tree bool + for _, blob := range pack.Blobs { idx.store(restic.PackedBlob{ Blob: restic.Blob{ @@ -511,6 +532,17 @@ func DecodeOldIndex(buf []byte) (idx *Index, err error) { }, PackID: pack.ID, }) + + switch blob.Type { + case restic.DataBlob: + data = true + case restic.TreeBlob: + tree = true + } + } + + if !data && tree { + idx.treePacks = append(idx.treePacks, pack.ID) } } idx.final = true diff --git a/internal/repository/packer_manager.go b/internal/repository/packer_manager.go index aa1885cc1..3b905903c 100644 --- a/internal/repository/packer_manager.go +++ b/internal/repository/packer_manager.go @@ -89,8 +89,8 @@ func (r *packerManager) insertPacker(p *Packer) { } // savePacker stores p in the backend. -func (r *Repository) savePacker(p *Packer) error { - debug.Log("save packer with %d blobs (%d bytes)\n", p.Packer.Count(), p.Packer.Size()) +func (r *Repository) savePacker(t restic.BlobType, p *Packer) error { + debug.Log("save packer for %v with %d blobs (%d bytes)\n", t, p.Packer.Count(), p.Packer.Size()) _, err := p.Packer.Finalize() if err != nil { return err @@ -112,6 +112,20 @@ func (r *Repository) savePacker(p *Packer) error { debug.Log("saved as %v", h) + if t == restic.TreeBlob && r.Cache != nil { + debug.Log("saving tree pack file in cache") + + _, err = p.tmpfile.Seek(0, 0) + if err != nil { + return errors.Wrap(err, "Seek") + } + + err := r.Cache.Save(h, p.tmpfile) + if err != nil { + return err + } + } + err = p.tmpfile.Close() if err != nil { return errors.Wrap(err, "close tempfile") diff --git a/internal/repository/repository.go b/internal/repository/repository.go index ae35f78c0..96eb932d9 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -7,6 +7,7 @@ import ( "fmt" "os" + "github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" @@ -23,6 +24,7 @@ type Repository struct { key *crypto.Key keyName string idx *MasterIndex + restic.Cache treePM *packerManager dataPM *packerManager @@ -45,6 +47,16 @@ func (r *Repository) Config() restic.Config { return r.cfg } +// UseCache replaces the backend with the wrapped cache. +func (r *Repository) UseCache(c restic.Cache) { + if c == nil { + return + } + debug.Log("using cache") + r.Cache = c + r.be = c.Wrap(r.be) +} + // PrefixLength returns the number of bytes required so that all prefixes of // all IDs of type t are unique. func (r *Repository) PrefixLength(t restic.FileType) (int, error) { @@ -53,11 +65,11 @@ func (r *Repository) PrefixLength(t restic.FileType) (int, error) { // LoadAndDecrypt loads and decrypts data identified by t and id from the // backend. -func (r *Repository) LoadAndDecrypt(ctx context.Context, t restic.FileType, id restic.ID) ([]byte, error) { +func (r *Repository) LoadAndDecrypt(ctx context.Context, t restic.FileType, id restic.ID) (buf []byte, err error) { debug.Log("load %v with id %v", t, id.Str()) h := restic.Handle{Type: t, Name: id.String()} - buf, err := backend.LoadAll(ctx, r.be, h) + buf, err = backend.LoadAll(ctx, r.be, h) if err != nil { debug.Log("error loading %v: %v", h, err) return nil, err @@ -76,6 +88,26 @@ func (r *Repository) LoadAndDecrypt(ctx context.Context, t restic.FileType, id r return buf[:n], nil } +// sortCachedPacks moves all cached pack files to the front of blobs. +func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedBlob { + if r.Cache == nil { + return blobs + } + + cached := make([]restic.PackedBlob, 0, len(blobs)/2) + noncached := make([]restic.PackedBlob, 0, len(blobs)/2) + + for _, blob := range blobs { + if r.Cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) { + cached = append(cached, blob) + continue + } + noncached = append(noncached, blob) + } + + return append(cached, noncached...) +} + // loadBlob tries to load and decrypt content identified by t and id from a // pack from the backend, the result is stored in plaintextBuf, which must be // large enough to hold the complete blob. @@ -89,9 +121,12 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy return 0, err } + // try cached pack files first + blobs = r.sortCachedPacks(blobs) + var lastError error for _, blob := range blobs { - debug.Log("id %v found: %v", id.Str(), blob) + debug.Log("blob %v/%v found: %v", t, id.Str(), blob) if blob.Type != t { debug.Log("blob %v has wrong block type, want %v", blob, t) @@ -212,7 +247,7 @@ func (r *Repository) SaveAndEncrypt(ctx context.Context, t restic.BlobType, data } // else write the pack to the backend - return *id, r.savePacker(packer) + return *id, r.savePacker(t, packer) } // SaveJSONUnpacked serialises item as JSON and encrypts and saves it in the @@ -251,20 +286,27 @@ func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []by // Flush saves all remaining packs. func (r *Repository) Flush() error { - for _, pm := range []*packerManager{r.dataPM, r.treePM} { - pm.pm.Lock() + pms := []struct { + t restic.BlobType + pm *packerManager + }{ + {restic.DataBlob, r.dataPM}, + {restic.TreeBlob, r.treePM}, + } - debug.Log("manually flushing %d packs", len(pm.packers)) - for _, p := range pm.packers { - err := r.savePacker(p) + for _, p := range pms { + p.pm.pm.Lock() + + debug.Log("manually flushing %d packs", len(p.pm.packers)) + for _, packer := range p.pm.packers { + err := r.savePacker(p.t, packer) if err != nil { - pm.pm.Unlock() + p.pm.pm.Unlock() return err } } - pm.packers = pm.packers[:0] - - pm.pm.Unlock() + p.pm.packers = p.pm.packers[:0] + p.pm.pm.Unlock() } return nil @@ -353,10 +395,58 @@ func (r *Repository) LoadIndex(ctx context.Context) error { ParallelWorkFuncParseID(worker)) }() + validIndex := restic.NewIDSet() for idx := range indexes { + id, err := idx.ID() + if err == nil { + validIndex.Insert(id) + } r.idx.Insert(idx) } + if r.Cache != nil { + // clear old index files + err := r.Cache.Clear(restic.IndexFile, validIndex) + if err != nil { + fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err) + } + + packs := restic.NewIDSet() + for _, idx := range r.idx.All() { + for id := range idx.Packs() { + packs.Insert(id) + } + } + + // clear old data files + err = r.Cache.Clear(restic.DataFile, packs) + if err != nil { + fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err) + } + + treePacks := restic.NewIDSet() + for _, idx := range r.idx.All() { + for _, id := range idx.TreePacks() { + treePacks.Insert(id) + } + } + + // use readahead + cache := r.Cache.(*cache.Cache) + cache.PerformReadahead = func(h restic.Handle) bool { + if h.Type != restic.DataFile { + return false + } + + id, err := restic.ParseID(h.Name) + if err != nil { + return false + } + + return treePacks.Has(id) + } + } + if err := <-errCh; err != nil { return err } diff --git a/internal/restic/cache.go b/internal/restic/cache.go new file mode 100644 index 000000000..56ed060ac --- /dev/null +++ b/internal/restic/cache.go @@ -0,0 +1,37 @@ +package restic + +import "io" + +// Cache manages a local cache. +type Cache interface { + // BaseDir returns the base directory of the cache. + BaseDir() string + + // Wrap returns a backend with a cache. + Wrap(Backend) Backend + + // IsNotExist returns true if the error was caused by a non-existing file. + IsNotExist(err error) bool + + // Load returns a reader that yields the contents of the file with the + // given id if it is cached. rd must be closed after use. If an error is + // returned, the ReadCloser is nil. The files are still encrypted + Load(h Handle, length int, offset int64) (io.ReadCloser, error) + + // SaveIndex saves an index in the cache. + Save(Handle, io.Reader) error + + // SaveWriter returns a writer for the to be cached object h. It must be + // closed after writing is finished. + SaveWriter(Handle) (io.WriteCloser, error) + + // Remove deletes a single file from the cache. If it isn't cached, this + // functions must return no error. + Remove(Handle) error + + // Clear removes all files of type t from the cache that are not contained in the set. + Clear(FileType, IDSet) error + + // Has returns true if the file is cached. + Has(Handle) bool +} diff --git a/internal/restic/readerat.go b/internal/restic/readerat.go index 7b5a01fa6..b03fa4687 100644 --- a/internal/restic/readerat.go +++ b/internal/restic/readerat.go @@ -5,6 +5,7 @@ import ( "io" "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" ) type backendReaderAt struct { @@ -37,5 +38,5 @@ func ReadAt(ctx context.Context, be Backend, h Handle, offset int64, p []byte) ( debug.Log("ReadAt(%v) ReadFull returned %v bytes", h, n) - return n, err + return n, errors.Wrapf(err, "ReadFull(%v)", h) }