From 7fd52f9f5701151bffa7fc5a9503cc8024cac27c Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 20 Jul 2015 00:13:39 +0200 Subject: [PATCH] Add exclude filter to archiver and 'backup' command --- archiver.go | 19 +++++-- cmd/restic/cmd_backup.go | 24 ++++++--- cmd/restic/cmd_ls.go | 18 ++++--- cmd/restic/integration_test.go | 97 +++++++++++++++++++++++++++++++++- filter/filter.go | 4 +- filter/filter_test.go | 6 +-- pipe/pipe.go | 28 +++++++--- pipe/pipe_test.go | 12 +++-- walk_test.go | 7 ++- 9 files changed, 179 insertions(+), 36 deletions(-) diff --git a/archiver.go b/archiver.go index 69b1d0b4a..1182a85c5 100644 --- a/archiver.go +++ b/archiver.go @@ -34,8 +34,8 @@ type Archiver struct { blobToken chan struct{} - Error func(dir string, fi os.FileInfo, err error) error - Filter func(item string, fi os.FileInfo) bool + Error func(dir string, fi os.FileInfo, err error) error + SelectFilter pipe.SelectFunc } // NewArchiver returns a new archiver. @@ -50,7 +50,7 @@ func NewArchiver(repo *repository.Repository) *Archiver { } arch.Error = archiverAbortOnAllErrors - arch.Filter = archiverAllowAllFiles + arch.SelectFilter = archiverAllowAllFiles return arch } @@ -577,7 +577,7 @@ func (arch *Archiver) Snapshot(p *Progress, paths []string, parentID backend.ID) pipeCh := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) go func() { - err := pipe.Walk(paths, done, pipeCh, resCh) + err := pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh) if err != nil { debug.Log("Archiver.Snapshot", "pipe.Walk returned error %v", err) return @@ -659,7 +659,7 @@ func isRegularFile(fi os.FileInfo) bool { // Scan traverses the dirs to collect Stat information while emitting progress // information with p. -func Scan(dirs []string, p *Progress) (Stat, error) { +func Scan(dirs []string, filter pipe.SelectFunc, p *Progress) (Stat, error) { p.Start() defer p.Done() @@ -678,6 +678,15 @@ func Scan(dirs []string, p *Progress) (Stat, error) { fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str) return nil } + + if !filter(str, fi) { + debug.Log("Scan.Walk", "path %v excluded", str) + if fi.IsDir() { + return filepath.SkipDir + } + return nil + } + s := Stat{} if fi.IsDir() { s.Dirs++ diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 99c91aef3..3e18c3402 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -10,13 +10,15 @@ import ( "github.com/restic/restic" "github.com/restic/restic/backend" + "github.com/restic/restic/filter" "github.com/restic/restic/repository" "golang.org/x/crypto/ssh/terminal" ) type CmdBackup struct { - Parent string `short:"p" long:"parent" description:"use this parent snapshot (default: last snapshot in repo that has the same target)"` - Force bool `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"` + Parent string `short:"p" long:"parent" description:"use this parent snapshot (default: last snapshot in repo that has the same target)"` + Force bool `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"` + Exclude []string `short:"e" long:"exclude" description:"Exclude a pattern (can be specified multiple times)"` global *GlobalOptions } @@ -282,14 +284,22 @@ func (cmd CmdBackup) Execute(args []string) error { cmd.global.Verbosef("scan %v\n", target) - stat, err := restic.Scan(target, cmd.newScanProgress()) + selectFilter := func(item string, fi os.FileInfo) bool { + matched, err := filter.List(cmd.Exclude, item) + if err != nil { + cmd.global.Warnf("error for exclude pattern: %v", err) + } - // TODO: add filter - // arch.Filter = func(dir string, fi os.FileInfo) bool { - // return true - // } + return !matched + } + + stat, err := restic.Scan(target, selectFilter, cmd.newScanProgress()) + if err != nil { + return err + } arch := restic.NewArchiver(repo) + arch.SelectFilter = selectFilter arch.Error = func(dir string, fi os.FileInfo, err error) error { // TODO: make ignoring errors configurable diff --git a/cmd/restic/cmd_ls.go b/cmd/restic/cmd_ls.go index 8ec904bbd..91c9507e3 100644 --- a/cmd/restic/cmd_ls.go +++ b/cmd/restic/cmd_ls.go @@ -11,6 +11,8 @@ import ( ) type CmdLs struct { + Long bool `short:"l" long:"long" description:"Use a long listing format showing size and mode"` + global *GlobalOptions } @@ -24,7 +26,11 @@ func init() { } } -func printNode(prefix string, n *restic.Node) string { +func (cmd CmdLs) printNode(prefix string, n *restic.Node) string { + if !cmd.Long { + return filepath.Join(prefix, n.Name) + } + switch n.Type { case "file": return fmt.Sprintf("%s %5d %5d %6d %s %s", @@ -40,17 +46,17 @@ func printNode(prefix string, n *restic.Node) string { } } -func printTree(prefix string, repo *repository.Repository, id backend.ID) error { +func (cmd CmdLs) printTree(prefix string, repo *repository.Repository, id backend.ID) error { tree, err := restic.LoadTree(repo, id) if err != nil { return err } for _, entry := range tree.Nodes { - fmt.Println(printNode(prefix, entry)) + cmd.global.Printf(cmd.printNode(prefix, entry) + "\n") if entry.Type == "dir" && entry.Subtree != nil { - err = printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree) + err = cmd.printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree) if err != nil { return err } @@ -89,7 +95,7 @@ func (cmd CmdLs) Execute(args []string) error { return err } - fmt.Printf("snapshot of %v at %s:\n", sn.Paths, sn.Time) + cmd.global.Verbosef("snapshot of %v at %s:\n", sn.Paths, sn.Time) - return printTree("", repo, sn.Tree) + return cmd.printTree("", repo, sn.Tree) } diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index b06b85745..21323e8cc 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "regexp" + "strings" "syscall" "testing" "time" @@ -44,7 +45,11 @@ func cmdInit(t testing.TB, global GlobalOptions) { } func cmdBackup(t testing.TB, global GlobalOptions, target []string, parentID backend.ID) { - cmd := &CmdBackup{global: &global} + cmdBackupExcludes(t, global, target, parentID, nil) +} + +func cmdBackupExcludes(t testing.TB, global GlobalOptions, target []string, parentID backend.ID, excludes []string) { + cmd := &CmdBackup{global: &global, Exclude: excludes} cmd.Parent = parentID.String() t.Logf("backing up %v", target) @@ -73,6 +78,16 @@ func cmdCheck(t testing.TB, global GlobalOptions) { OK(t, cmd.Execute(nil)) } +func cmdLs(t testing.TB, global GlobalOptions, snapshotID string) []string { + var buf bytes.Buffer + global.stdout = &buf + + cmd := &CmdLs{global: &global} + OK(t, cmd.Execute([]string{snapshotID})) + + return strings.Split(string(buf.Bytes()), "\n") +} + func TestBackup(t *testing.T) { withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { datafile := filepath.Join("testdata", "backup-data.tar.gz") @@ -237,6 +252,86 @@ func TestBackupMissingFile2(t *testing.T) { }) } +func includes(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + + return false +} + +func loadSnapshotMap(t testing.TB, global GlobalOptions) map[string]struct{} { + snapshotIDs := cmdList(t, global, "snapshots") + + m := make(map[string]struct{}) + for _, id := range snapshotIDs { + m[id.String()] = struct{}{} + } + + return m +} + +func lastSnapshot(old, new map[string]struct{}) (map[string]struct{}, string) { + for k := range new { + if _, ok := old[k]; !ok { + old[k] = struct{}{} + return old, k + } + } + + return old, "" +} + +var backupExcludeFilenames = []string{ + "testfile1", + "foo.tar.gz", + "private/secret/passwords.txt", + "work/source/test.c", +} + +func TestBackupExclude(t *testing.T) { + withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { + cmdInit(t, global) + + datadir := filepath.Join(env.base, "testdata") + + for _, filename := range backupExcludeFilenames { + fp := filepath.Join(datadir, filename) + OK(t, os.MkdirAll(filepath.Dir(fp), 0755)) + + f, err := os.Create(fp) + OK(t, err) + + fmt.Fprintf(f, filename) + OK(t, f.Close()) + } + + snapshots := make(map[string]struct{}) + + cmdBackup(t, global, []string{datadir}, nil) + snapshots, snapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, global)) + files := cmdLs(t, global, snapshotID) + Assert(t, includes(files, filepath.Join("testdata", "foo.tar.gz")), + "expected file %q in first snapshot, but it's not included", "foo.tar.gz") + + cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz"}) + snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global)) + files = cmdLs(t, global, snapshotID) + Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")), + "expected file %q not in first snapshot, but it's included", "foo.tar.gz") + + cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz", "private/secret"}) + snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global)) + files = cmdLs(t, global, snapshotID) + Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")), + "expected file %q not in first snapshot, but it's included", "foo.tar.gz") + Assert(t, !includes(files, filepath.Join("testdata", "private", "secret", "passwords.txt")), + "expected file %q not in first snapshot, but it's included", "passwords.txt") + }) +} + const ( incrementalFirstWrite = 20 * 1042 * 1024 incrementalSecondWrite = 12 * 1042 * 1024 diff --git a/filter/filter.go b/filter/filter.go index 274d1f960..f8c335e34 100644 --- a/filter/filter.go +++ b/filter/filter.go @@ -91,8 +91,8 @@ func match(patterns, strs []string) (matched bool, err error) { return false, nil } -// MatchList returns true if str matches one of the patterns. -func MatchList(patterns []string, str string) (matched bool, err error) { +// List returns true if str matches one of the patterns. +func List(patterns []string, str string) (matched bool, err error) { for _, pat := range patterns { matched, err = Match(pat, str) if err != nil { diff --git a/filter/filter_test.go b/filter/filter_test.go index ce9474efc..78e731b68 100644 --- a/filter/filter_test.go +++ b/filter/filter_test.go @@ -121,7 +121,7 @@ var filterListTests = []struct { func TestMatchList(t *testing.T) { for i, test := range filterListTests { - match, err := filter.MatchList(test.patterns, test.path) + match, err := filter.List(test.patterns, test.path) if err != nil { t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v", i, test.patterns, err) @@ -136,7 +136,7 @@ func TestMatchList(t *testing.T) { } func ExampleMatchList() { - match, _ := filter.MatchList([]string{"*.c", "*.go"}, "/home/user/file.go") + match, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go") fmt.Printf("match: %v\n", match) // Output: // match: true @@ -235,7 +235,7 @@ func BenchmarkFilterPatterns(b *testing.B) { for i := 0; i < b.N; i++ { c = 0 for _, line := range lines { - match, err := filter.MatchList(patterns, line) + match, err := filter.List(patterns, line) if err != nil { b.Fatal(err) } diff --git a/pipe/pipe.go b/pipe/pipe.go index a419f082d..4e9908315 100644 --- a/pipe/pipe.go +++ b/pipe/pipe.go @@ -82,13 +82,22 @@ func isFile(fi os.FileInfo) bool { var errCancelled = errors.New("walk cancelled") -func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- Result) error { +// SelectFunc returns true for all items that should be included (files and +// dirs). If false is returned, files are ignored and dirs are not even walked. +type SelectFunc func(item string, fi os.FileInfo) bool + +func walk(basedir, dir string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error { info, err := os.Lstat(dir) if err != nil { debug.Log("pipe.walk", "error for %v: %v", dir, err) return err } + if !selectFunc(dir, info) { + debug.Log("pipe.walk", "file %v excluded by filter", dir) + return nil + } + relpath, _ := filepath.Rel(basedir, dir) if !info.IsDir() { @@ -114,13 +123,18 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R for _, name := range names { subpath := filepath.Join(dir, name) + fi, statErr := os.Lstat(subpath) + if !selectFunc(subpath, fi) { + debug.Log("pipe.walk", "file %v excluded by filter", subpath) + continue + } + ch := make(chan Result, 1) entries = append(entries, ch) - fi, err := os.Lstat(subpath) - if err != nil { + if statErr != nil { select { - case jobs <- Entry{info: fi, error: err, basedir: basedir, path: filepath.Join(relpath, name), result: ch}: + case jobs <- Entry{info: fi, error: statErr, basedir: basedir, path: filepath.Join(relpath, name), result: ch}: case <-done: return errCancelled } @@ -132,7 +146,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R debug.RunHook("pipe.walk2", filepath.Join(relpath, name)) if isDir(fi) { - err = walk(basedir, subpath, done, jobs, ch) + err = walk(basedir, subpath, selectFunc, done, jobs, ch) if err != nil { return err } @@ -156,7 +170,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R // Walk sends a Job for each file and directory it finds below the paths. When // the channel done is closed, processing stops. -func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result) error { +func Walk(paths []string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error { defer func() { debug.Log("pipe.Walk", "output channel closed") close(jobs) @@ -166,7 +180,7 @@ func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result for _, path := range paths { debug.Log("pipe.Walk", "start walker for %v", path) ch := make(chan Result, 1) - err := walk(filepath.Dir(path), path, done, jobs, ch) + err := walk(filepath.Dir(path), path, selectFunc, done, jobs, ch) if err != nil { debug.Log("pipe.Walk", "error for %v: %v", path, err) continue diff --git a/pipe/pipe_test.go b/pipe/pipe_test.go index 42ff7c31d..001015938 100644 --- a/pipe/pipe_test.go +++ b/pipe/pipe_test.go @@ -19,6 +19,10 @@ type stats struct { dirs, files int } +func acceptAll(string, os.FileInfo) bool { + return true +} + func statPath(path string) (stats, error) { var s stats @@ -118,7 +122,7 @@ func TestPipelineWalkerWithSplit(t *testing.T) { }() resCh := make(chan pipe.Result, 1) - err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh) + err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) OK(t, err) // wait for all workers to terminate @@ -198,7 +202,7 @@ func TestPipelineWalker(t *testing.T) { } resCh := make(chan pipe.Result, 1) - err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh) + err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) OK(t, err) // wait for all workers to terminate @@ -298,7 +302,7 @@ func BenchmarkPipelineWalker(b *testing.B) { }() resCh := make(chan pipe.Result, 1) - err := pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh) + err := pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) OK(b, err) // wait for all workers to terminate @@ -375,7 +379,7 @@ func TestPipelineWalkerMultiple(t *testing.T) { } resCh := make(chan pipe.Result, 1) - err = pipe.Walk(paths, done, jobs, resCh) + err = pipe.Walk(paths, acceptAll, done, jobs, resCh) OK(t, err) // wait for all workers to terminate diff --git a/walk_test.go b/walk_test.go index 397655978..4e0f8b930 100644 --- a/walk_test.go +++ b/walk_test.go @@ -1,6 +1,7 @@ package restic_test import ( + "os" "path/filepath" "testing" @@ -33,7 +34,11 @@ func TestWalkTree(t *testing.T) { // start filesystem walker fsJobs := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) - go pipe.Walk(dirs, done, fsJobs, resCh) + + f := func(string, os.FileInfo) bool { + return true + } + go pipe.Walk(dirs, f, done, fsJobs, resCh) for { // receive fs job