diff --git a/archiver.go b/archiver.go index 69b1d0b4a..1182a85c5 100644 --- a/archiver.go +++ b/archiver.go @@ -34,8 +34,8 @@ type Archiver struct { blobToken chan struct{} - Error func(dir string, fi os.FileInfo, err error) error - Filter func(item string, fi os.FileInfo) bool + Error func(dir string, fi os.FileInfo, err error) error + SelectFilter pipe.SelectFunc } // NewArchiver returns a new archiver. @@ -50,7 +50,7 @@ func NewArchiver(repo *repository.Repository) *Archiver { } arch.Error = archiverAbortOnAllErrors - arch.Filter = archiverAllowAllFiles + arch.SelectFilter = archiverAllowAllFiles return arch } @@ -577,7 +577,7 @@ func (arch *Archiver) Snapshot(p *Progress, paths []string, parentID backend.ID) pipeCh := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) go func() { - err := pipe.Walk(paths, done, pipeCh, resCh) + err := pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh) if err != nil { debug.Log("Archiver.Snapshot", "pipe.Walk returned error %v", err) return @@ -659,7 +659,7 @@ func isRegularFile(fi os.FileInfo) bool { // Scan traverses the dirs to collect Stat information while emitting progress // information with p. -func Scan(dirs []string, p *Progress) (Stat, error) { +func Scan(dirs []string, filter pipe.SelectFunc, p *Progress) (Stat, error) { p.Start() defer p.Done() @@ -678,6 +678,15 @@ func Scan(dirs []string, p *Progress) (Stat, error) { fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str) return nil } + + if !filter(str, fi) { + debug.Log("Scan.Walk", "path %v excluded", str) + if fi.IsDir() { + return filepath.SkipDir + } + return nil + } + s := Stat{} if fi.IsDir() { s.Dirs++ diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 99c91aef3..3e18c3402 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -10,13 +10,15 @@ import ( "github.com/restic/restic" "github.com/restic/restic/backend" + "github.com/restic/restic/filter" "github.com/restic/restic/repository" "golang.org/x/crypto/ssh/terminal" ) type CmdBackup struct { - Parent string `short:"p" long:"parent" description:"use this parent snapshot (default: last snapshot in repo that has the same target)"` - Force bool `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"` + Parent string `short:"p" long:"parent" description:"use this parent snapshot (default: last snapshot in repo that has the same target)"` + Force bool `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"` + Exclude []string `short:"e" long:"exclude" description:"Exclude a pattern (can be specified multiple times)"` global *GlobalOptions } @@ -282,14 +284,22 @@ func (cmd CmdBackup) Execute(args []string) error { cmd.global.Verbosef("scan %v\n", target) - stat, err := restic.Scan(target, cmd.newScanProgress()) + selectFilter := func(item string, fi os.FileInfo) bool { + matched, err := filter.List(cmd.Exclude, item) + if err != nil { + cmd.global.Warnf("error for exclude pattern: %v", err) + } - // TODO: add filter - // arch.Filter = func(dir string, fi os.FileInfo) bool { - // return true - // } + return !matched + } + + stat, err := restic.Scan(target, selectFilter, cmd.newScanProgress()) + if err != nil { + return err + } arch := restic.NewArchiver(repo) + arch.SelectFilter = selectFilter arch.Error = func(dir string, fi os.FileInfo, err error) error { // TODO: make ignoring errors configurable diff --git a/cmd/restic/cmd_ls.go b/cmd/restic/cmd_ls.go index 8ec904bbd..91c9507e3 100644 --- a/cmd/restic/cmd_ls.go +++ b/cmd/restic/cmd_ls.go @@ -11,6 +11,8 @@ import ( ) type CmdLs struct { + Long bool `short:"l" long:"long" description:"Use a long listing format showing size and mode"` + global *GlobalOptions } @@ -24,7 +26,11 @@ func init() { } } -func printNode(prefix string, n *restic.Node) string { +func (cmd CmdLs) printNode(prefix string, n *restic.Node) string { + if !cmd.Long { + return filepath.Join(prefix, n.Name) + } + switch n.Type { case "file": return fmt.Sprintf("%s %5d %5d %6d %s %s", @@ -40,17 +46,17 @@ func printNode(prefix string, n *restic.Node) string { } } -func printTree(prefix string, repo *repository.Repository, id backend.ID) error { +func (cmd CmdLs) printTree(prefix string, repo *repository.Repository, id backend.ID) error { tree, err := restic.LoadTree(repo, id) if err != nil { return err } for _, entry := range tree.Nodes { - fmt.Println(printNode(prefix, entry)) + cmd.global.Printf(cmd.printNode(prefix, entry) + "\n") if entry.Type == "dir" && entry.Subtree != nil { - err = printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree) + err = cmd.printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree) if err != nil { return err } @@ -89,7 +95,7 @@ func (cmd CmdLs) Execute(args []string) error { return err } - fmt.Printf("snapshot of %v at %s:\n", sn.Paths, sn.Time) + cmd.global.Verbosef("snapshot of %v at %s:\n", sn.Paths, sn.Time) - return printTree("", repo, sn.Tree) + return cmd.printTree("", repo, sn.Tree) } diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index e4d939fd2..cb74f4768 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -1,14 +1,19 @@ package main import ( + "errors" "fmt" - "path/filepath" "github.com/restic/restic" "github.com/restic/restic/debug" + "github.com/restic/restic/filter" ) type CmdRestore struct { + Exclude []string `short:"e" long:"exclude" description:"Exclude a pattern (can be specified multiple times)"` + Include []string `short:"i" long:"include" description:"Include a pattern, exclude everything else (can be specified multiple times)"` + Target string `short:"t" long:"target" description:"Directory to restore to"` + global *GlobalOptions } @@ -23,14 +28,26 @@ func init() { } func (cmd CmdRestore) Usage() string { - return "snapshot-ID TARGETDIR [PATTERN]" + return "snapshot-ID" } func (cmd CmdRestore) Execute(args []string) error { - if len(args) < 2 || len(args) > 3 { + if len(args) != 1 { return fmt.Errorf("wrong number of arguments, Usage: %s", cmd.Usage()) } + if cmd.Target == "" { + return errors.New("please specify a directory to restore to (--target)") + } + + if len(cmd.Exclude) > 0 && len(cmd.Include) > 0 { + return errors.New("exclude and include patterns are mutually exclusive") + } + + snapshotIDString := args[0] + + debug.Log("restore", "restore %v to %v", snapshotIDString, cmd.Target) + repo, err := cmd.global.OpenRepository() if err != nil { return err @@ -47,14 +64,11 @@ func (cmd CmdRestore) Execute(args []string) error { return err } - id, err := restic.FindSnapshot(repo, args[0]) + id, err := restic.FindSnapshot(repo, snapshotIDString) if err != nil { - cmd.global.Exitf(1, "invalid id %q: %v", args[0], err) + cmd.global.Exitf(1, "invalid id %q: %v", snapshotIDString, err) } - target := args[1] - - // create restorer res, err := restic.NewRestorer(repo, id) if err != nil { cmd.global.Exitf(2, "creating restorer failed: %v\n", err) @@ -62,41 +76,36 @@ func (cmd CmdRestore) Execute(args []string) error { res.Error = func(dir string, node *restic.Node, err error) error { cmd.global.Warnf("error for %s: %+v\n", dir, err) - - // if node.Type == "dir" { - // if e, ok := err.(*os.PathError); ok { - // if errn, ok := e.Err.(syscall.Errno); ok { - // if errn == syscall.EEXIST { - // fmt.Printf("ignoring already existing directory %s\n", dir) - // return nil - // } - // } - // } - // } return err } - // TODO: a filter against the full path sucks as filepath.Match doesn't match - // directory separators on '*'. still, it's better than nothing. - if len(args) > 2 { - pattern := args[2] - cmd.global.Verbosef("filter pattern %q\n", pattern) - - res.SelectForRestore = func(item string, dstpath string, node *restic.Node) bool { - matched, err := filepath.Match(pattern, node.Name) - if err != nil { - panic(err) - } - if !matched { - debug.Log("restic.restore", "item %v doesn't match pattern %q", item, pattern) - } - return matched + selectExcludeFilter := func(item string, dstpath string, node *restic.Node) bool { + matched, err := filter.List(cmd.Exclude, item) + if err != nil { + cmd.global.Warnf("error for exclude pattern: %v", err) } + + return !matched } - cmd.global.Verbosef("restoring %s to %s\n", res.Snapshot(), target) + selectIncludeFilter := func(item string, dstpath string, node *restic.Node) bool { + matched, err := filter.List(cmd.Include, item) + if err != nil { + cmd.global.Warnf("error for include pattern: %v", err) + } - err = res.RestoreTo(target) + return matched + } + + if len(cmd.Exclude) > 0 { + res.SelectFilter = selectExcludeFilter + } else if len(cmd.Include) > 0 { + res.SelectFilter = selectIncludeFilter + } + + cmd.global.Verbosef("restoring %s to %s\n", res.Snapshot(), cmd.Target) + + err = res.RestoreTo(cmd.Target) if err != nil { return err } diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index b06b85745..93c98fa60 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -10,12 +10,14 @@ import ( "os" "path/filepath" "regexp" + "strings" "syscall" "testing" "time" "github.com/restic/restic/backend" "github.com/restic/restic/debug" + "github.com/restic/restic/filter" . "github.com/restic/restic/test" ) @@ -44,7 +46,11 @@ func cmdInit(t testing.TB, global GlobalOptions) { } func cmdBackup(t testing.TB, global GlobalOptions, target []string, parentID backend.ID) { - cmd := &CmdBackup{global: &global} + cmdBackupExcludes(t, global, target, parentID, nil) +} + +func cmdBackupExcludes(t testing.TB, global GlobalOptions, target []string, parentID backend.ID, excludes []string) { + cmd := &CmdBackup{global: &global, Exclude: excludes} cmd.Parent = parentID.String() t.Logf("backing up %v", target) @@ -63,9 +69,18 @@ func cmdList(t testing.TB, global GlobalOptions, tpe string) []backend.ID { return IDs } -func cmdRestore(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID, args ...string) { - cmd := &CmdRestore{global: &global} - cmd.Execute(append([]string{snapshotID.String(), dir}, args...)) +func cmdRestore(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID) { + cmdRestoreExcludes(t, global, dir, snapshotID, nil) +} + +func cmdRestoreExcludes(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID, excludes []string) { + cmd := &CmdRestore{global: &global, Target: dir, Exclude: excludes} + OK(t, cmd.Execute([]string{snapshotID.String()})) +} + +func cmdRestoreIncludes(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID, includes []string) { + cmd := &CmdRestore{global: &global, Target: dir, Include: includes} + OK(t, cmd.Execute([]string{snapshotID.String()})) } func cmdCheck(t testing.TB, global GlobalOptions) { @@ -73,6 +88,16 @@ func cmdCheck(t testing.TB, global GlobalOptions) { OK(t, cmd.Execute(nil)) } +func cmdLs(t testing.TB, global GlobalOptions, snapshotID string) []string { + var buf bytes.Buffer + global.stdout = &buf + + cmd := &CmdLs{global: &global} + OK(t, cmd.Execute([]string{snapshotID})) + + return strings.Split(string(buf.Bytes()), "\n") +} + func TestBackup(t *testing.T) { withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { datafile := filepath.Join("testdata", "backup-data.tar.gz") @@ -237,6 +262,86 @@ func TestBackupMissingFile2(t *testing.T) { }) } +func includes(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + + return false +} + +func loadSnapshotMap(t testing.TB, global GlobalOptions) map[string]struct{} { + snapshotIDs := cmdList(t, global, "snapshots") + + m := make(map[string]struct{}) + for _, id := range snapshotIDs { + m[id.String()] = struct{}{} + } + + return m +} + +func lastSnapshot(old, new map[string]struct{}) (map[string]struct{}, string) { + for k := range new { + if _, ok := old[k]; !ok { + old[k] = struct{}{} + return old, k + } + } + + return old, "" +} + +var backupExcludeFilenames = []string{ + "testfile1", + "foo.tar.gz", + "private/secret/passwords.txt", + "work/source/test.c", +} + +func TestBackupExclude(t *testing.T) { + withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) { + cmdInit(t, global) + + datadir := filepath.Join(env.base, "testdata") + + for _, filename := range backupExcludeFilenames { + fp := filepath.Join(datadir, filename) + OK(t, os.MkdirAll(filepath.Dir(fp), 0755)) + + f, err := os.Create(fp) + OK(t, err) + + fmt.Fprintf(f, filename) + OK(t, f.Close()) + } + + snapshots := make(map[string]struct{}) + + cmdBackup(t, global, []string{datadir}, nil) + snapshots, snapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, global)) + files := cmdLs(t, global, snapshotID) + Assert(t, includes(files, filepath.Join("testdata", "foo.tar.gz")), + "expected file %q in first snapshot, but it's not included", "foo.tar.gz") + + cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz"}) + snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global)) + files = cmdLs(t, global, snapshotID) + Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")), + "expected file %q not in first snapshot, but it's included", "foo.tar.gz") + + cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz", "private/secret"}) + snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global)) + files = cmdLs(t, global, snapshotID) + Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")), + "expected file %q not in first snapshot, but it's included", "foo.tar.gz") + Assert(t, !includes(files, filepath.Join("testdata", "private", "secret", "passwords.txt")), + "expected file %q not in first snapshot, but it's included", "passwords.txt") + }) +} + const ( incrementalFirstWrite = 20 * 1042 * 1024 incrementalSecondWrite = 12 * 1042 * 1024 @@ -422,10 +527,10 @@ func TestRestoreFilter(t *testing.T) { for i, pat := range []string{"*.c", "*.exe", "*", "*file3*"} { base := filepath.Join(env.base, fmt.Sprintf("restore%d", i+1)) - cmdRestore(t, global, base, snapshotID, pat) + cmdRestoreExcludes(t, global, base, snapshotID, []string{pat}) for _, test := range testfiles { err := testFileSize(filepath.Join(base, "testdata", test.name), int64(test.size)) - if ok, _ := filepath.Match(pat, filepath.Base(test.name)); ok { + if ok, _ := filter.Match(pat, filepath.Base(test.name)); !ok { OK(t, err) } else { Assert(t, os.IsNotExist(err), @@ -463,7 +568,7 @@ func TestRestoreNoMetadataOnIgnoredIntermediateDirs(t *testing.T) { // restore with filter "*.ext", this should restore "file.ext", but // since the directories are ignored and only created because of // "file.ext", no meta data should be restored for them. - cmdRestore(t, global, filepath.Join(env.base, "restore0"), snapshotID, "*.ext") + cmdRestoreIncludes(t, global, filepath.Join(env.base, "restore0"), snapshotID, []string{"*.ext"}) f1 := filepath.Join(env.base, "restore0", "testdata", "subdir1", "subdir2") fi, err := os.Stat(f1) @@ -473,7 +578,7 @@ func TestRestoreNoMetadataOnIgnoredIntermediateDirs(t *testing.T) { "meta data of intermediate directory has been restore although it was ignored") // restore with filter "*", this should restore meta data on everything. - cmdRestore(t, global, filepath.Join(env.base, "restore1"), snapshotID, "*") + cmdRestoreIncludes(t, global, filepath.Join(env.base, "restore1"), snapshotID, []string{"*"}) f2 := filepath.Join(env.base, "restore1", "testdata", "subdir1", "subdir2") fi, err = os.Stat(f2) diff --git a/filter/doc.go b/filter/doc.go new file mode 100644 index 000000000..4e0ba0f4d --- /dev/null +++ b/filter/doc.go @@ -0,0 +1,5 @@ +// Package filter implements filters for files similar to filepath.Glob, but +// in contrast to filepath.Glob a pattern may specify directories. +// +// For a list of valid patterns please see the documentation on filepath.Glob. +package filter diff --git a/filter/filter.go b/filter/filter.go new file mode 100644 index 000000000..f8c335e34 --- /dev/null +++ b/filter/filter.go @@ -0,0 +1,108 @@ +package filter + +import ( + "errors" + "path/filepath" + "strings" +) + +// ErrBadString is returned when Match is called with the empty string as the +// second argument. +var ErrBadString = errors.New("filter.Match: string is empty") + +// Match returns true if str matches the pattern. When the pattern is +// malformed, filepath.ErrBadPattern is returned. The empty pattern matches +// everything, when str is the empty string ErrBadString is returned. +// +// Pattern can be a combination of patterns suitable for filepath.Match, joined +// by filepath.Separator. +func Match(pattern, str string) (matched bool, err error) { + if pattern == "" { + return true, nil + } + + if str == "" { + return false, ErrBadString + } + + patterns := strings.Split(pattern, string(filepath.Separator)) + strs := strings.Split(str, string(filepath.Separator)) + + return match(patterns, strs) +} + +func hasDoubleWildcard(list []string) (ok bool, pos int) { + for i, item := range list { + if item == "**" { + return true, i + } + } + + return false, 0 +} + +func match(patterns, strs []string) (matched bool, err error) { + if ok, pos := hasDoubleWildcard(patterns); ok { + // gradually expand '**' into separate wildcards + for i := 0; i <= len(strs)-len(patterns)+1; i++ { + newPat := make([]string, pos) + copy(newPat, patterns[:pos]) + for k := 0; k < i; k++ { + newPat = append(newPat, "*") + } + newPat = append(newPat, patterns[pos+1:]...) + + matched, err := match(newPat, strs) + if err != nil { + return false, err + } + + if matched { + return true, nil + } + } + + return false, nil + } + + if len(patterns) == 0 && len(strs) == 0 { + return true, nil + } + + if len(patterns) <= len(strs) { + outer: + for offset := len(strs) - len(patterns); offset >= 0; offset-- { + + for i := len(patterns) - 1; i >= 0; i-- { + ok, err := filepath.Match(patterns[i], strs[offset+i]) + if err != nil { + return false, err + } + + if !ok { + continue outer + } + } + + return true, nil + } + } + + return false, nil +} + +// List returns true if str matches one of the patterns. +func List(patterns []string, str string) (matched bool, err error) { + for _, pat := range patterns { + matched, err = Match(pat, str) + if err != nil { + return false, err + } + + if matched { + return true, nil + } + } + + return false, nil +} diff --git a/filter/filter_test.go b/filter/filter_test.go new file mode 100644 index 000000000..78e731b68 --- /dev/null +++ b/filter/filter_test.go @@ -0,0 +1,252 @@ +package filter_test + +import ( + "bufio" + "compress/bzip2" + "fmt" + "os" + "testing" + + "github.com/restic/restic/filter" +) + +var matchTests = []struct { + pattern string + path string + match bool +}{ + {"", "", true}, + {"", "foo", true}, + {"", "/x/y/z/foo", true}, + {"*.go", "/foo/bar/test.go", true}, + {"*.c", "/foo/bar/test.go", false}, + {"*", "/foo/bar/test.go", true}, + {"foo*", "/foo/bar/test.go", true}, + {"bar*", "/foo/bar/test.go", true}, + {"/bar*", "/foo/bar/test.go", false}, + {"bar/*", "/foo/bar/test.go", true}, + {"baz/*", "/foo/bar/test.go", false}, + {"bar/test.go", "/foo/bar/test.go", true}, + {"bar/*.go", "/foo/bar/test.go", true}, + {"ba*/*.go", "/foo/bar/test.go", true}, + {"bb*/*.go", "/foo/bar/test.go", false}, + {"test.*", "/foo/bar/test.go", true}, + {"tesT.*", "/foo/bar/test.go", false}, + {"bar/*", "/foo/bar/baz", true}, + {"bar", "/foo/bar", true}, + {"bar", "/foo/bar/baz", true}, + {"bar", "/foo/bar/test.go", true}, + {"/foo/*test.*", "/foo/bar/test.go", false}, + {"/foo/*/test.*", "/foo/bar/test.go", true}, + {"/foo/*/bar/test.*", "/foo/bar/test.go", false}, + {"/*/*/bar/test.*", "/foo/bar/test.go", false}, + {"/*/*/bar/test.*", "/foo/bar/baz/test.go", false}, + {"/*/*/baz/test.*", "/foo/bar/baz/test.go", true}, + {"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false}, + {"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false}, + {"/foo/bar/test.*", "bar/baz/test.go", false}, + {"/x/y/bar/baz/test.*", "bar/baz/test.go", false}, + {"/x/y/bar/baz/test.c", "bar/baz/test.go", false}, + {"baz/test.*", "bar/baz/test.go", true}, + {"baz/tesT.*", "bar/baz/test.go", false}, + {"test.go", "bar/baz/test.go", true}, + {"*.go", "bar/baz/test.go", true}, + {"*.c", "bar/baz/test.go", false}, + {"sdk", "/foo/bar/sdk", true}, + {"sdk", "/foo/bar/sdk/test/sdk_foo.go", true}, + { + "sdk/*/cpp/*/*vars*.html", + "/usr/share/doc/libreoffice/sdk/docs/cpp/ref/a00517.html", + false, + }, + {"foo/**/bar/*.go", "/home/user/foo/work/special/project/bar/test.go", true}, + {"foo/**/bar/*.go", "/home/user/foo/bar/test.go", true}, + {"foo/**/bar/*.go", "x/foo/bar/test.go", true}, + {"foo/**/bar/*.go", "foo/bar/test.go", true}, + {"foo/**/bar/*.go", "/home/user/foo/test.c", false}, + {"foo/**/bar/*.go", "bar/foo/main.go", false}, + {"foo/**/bar/*.go", "/foo/bar/main.go", true}, + {"foo/**/bar/*.go", "bar/main.go", false}, + {"foo/**/bar", "/home/user/foo/x/y/bar", true}, + {"foo/**/bar", "/home/user/foo/x/y/bar/main.go", true}, + {"user/**/important*", "/home/user/work/x/y/hidden/x", false}, + {"user/**/hidden*/**/c", "/home/user/work/x/y/hidden/z/a/b/c", true}, +} + +func TestMatch(t *testing.T) { + for i, test := range matchTests { + match, err := filter.Match(test.pattern, test.path) + if err != nil { + t.Errorf("test %d failed: expected no error for pattern %q, but error returned: %v", + i, test.pattern, err) + continue + } + + if match != test.match { + t.Errorf("test %d: filter.Match(%q, %q): expected %v, got %v", + i, test.pattern, test.path, test.match, match) + } + } +} + +func ExampleMatch() { + match, _ := filter.Match("*.go", "/home/user/file.go") + fmt.Printf("match: %v\n", match) + // Output: + // match: true +} + +func ExampleMatch_wildcards() { + match, _ := filter.Match("/home/[uU]ser/?.go", "/home/user/F.go") + fmt.Printf("match: %v\n", match) + // Output: + // match: true +} + +var filterListTests = []struct { + patterns []string + path string + match bool +}{ + {[]string{"*.go"}, "/foo/bar/test.go", true}, + {[]string{"*.c"}, "/foo/bar/test.go", false}, + {[]string{"*.go", "*.c"}, "/foo/bar/test.go", true}, + {[]string{"*"}, "/foo/bar/test.go", true}, + {[]string{"x"}, "/foo/bar/test.go", false}, + {[]string{"?"}, "/foo/bar/test.go", false}, + {[]string{"?", "x"}, "/foo/bar/x", true}, + {[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false}, + {[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true}, +} + +func TestMatchList(t *testing.T) { + for i, test := range filterListTests { + match, err := filter.List(test.patterns, test.path) + if err != nil { + t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v", + i, test.patterns, err) + continue + } + + if match != test.match { + t.Errorf("test %d: filter.MatchList(%q, %q): expected %v, got %v", + i, test.patterns, test.path, test.match, match) + } + } +} + +func ExampleMatchList() { + match, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go") + fmt.Printf("match: %v\n", match) + // Output: + // match: true +} + +func extractTestLines(t testing.TB) (lines []string) { + f, err := os.Open("testdata/libreoffice.txt.bz2") + if err != nil { + t.Fatal(err) + } + + defer func() { + if err := f.Close(); err != nil { + t.Fatal(err) + } + }() + + sc := bufio.NewScanner(bzip2.NewReader(f)) + for sc.Scan() { + lines = append(lines, sc.Text()) + } + + return lines +} + +func TestFilterPatternsFile(t *testing.T) { + lines := extractTestLines(t) + + var testPatterns = []struct { + pattern string + hits uint + }{ + {"*.html", 18249}, + {"sdk", 22186}, + {"sdk/*/cpp/*/*vars.html", 3}, + } + + for _, test := range testPatterns { + var c uint + for _, line := range lines { + match, err := filter.Match(test.pattern, line) + if err != nil { + t.Error(err) + continue + } + + if match { + c++ + // fmt.Printf("pattern %q, line %q\n", test.pattern, line) + } + } + + if c != test.hits { + t.Errorf("wrong number of hits for pattern %q: want %d, got %d", + test.pattern, test.hits, c) + } + } +} + +func BenchmarkFilterLines(b *testing.B) { + pattern := "sdk/*/cpp/*/*vars.html" + lines := extractTestLines(b) + var c uint + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + c = 0 + for _, line := range lines { + match, err := filter.Match(pattern, line) + if err != nil { + b.Fatal(err) + } + + if match { + c++ + } + } + + if c != 3 { + b.Fatalf("wrong number of matches: expected 3, got %d", c) + } + } +} + +func BenchmarkFilterPatterns(b *testing.B) { + patterns := []string{ + "sdk/*", + "*.html", + } + lines := extractTestLines(b) + var c uint + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + c = 0 + for _, line := range lines { + match, err := filter.List(patterns, line) + if err != nil { + b.Fatal(err) + } + + if match { + c++ + } + } + + if c != 22185 { + b.Fatalf("wrong number of matches: expected 22185, got %d", c) + } + } +} diff --git a/filter/testdata/libreoffice.txt.bz2 b/filter/testdata/libreoffice.txt.bz2 new file mode 100644 index 000000000..adc90f2e2 Binary files /dev/null and b/filter/testdata/libreoffice.txt.bz2 differ diff --git a/pipe/pipe.go b/pipe/pipe.go index a419f082d..4e9908315 100644 --- a/pipe/pipe.go +++ b/pipe/pipe.go @@ -82,13 +82,22 @@ func isFile(fi os.FileInfo) bool { var errCancelled = errors.New("walk cancelled") -func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- Result) error { +// SelectFunc returns true for all items that should be included (files and +// dirs). If false is returned, files are ignored and dirs are not even walked. +type SelectFunc func(item string, fi os.FileInfo) bool + +func walk(basedir, dir string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error { info, err := os.Lstat(dir) if err != nil { debug.Log("pipe.walk", "error for %v: %v", dir, err) return err } + if !selectFunc(dir, info) { + debug.Log("pipe.walk", "file %v excluded by filter", dir) + return nil + } + relpath, _ := filepath.Rel(basedir, dir) if !info.IsDir() { @@ -114,13 +123,18 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R for _, name := range names { subpath := filepath.Join(dir, name) + fi, statErr := os.Lstat(subpath) + if !selectFunc(subpath, fi) { + debug.Log("pipe.walk", "file %v excluded by filter", subpath) + continue + } + ch := make(chan Result, 1) entries = append(entries, ch) - fi, err := os.Lstat(subpath) - if err != nil { + if statErr != nil { select { - case jobs <- Entry{info: fi, error: err, basedir: basedir, path: filepath.Join(relpath, name), result: ch}: + case jobs <- Entry{info: fi, error: statErr, basedir: basedir, path: filepath.Join(relpath, name), result: ch}: case <-done: return errCancelled } @@ -132,7 +146,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R debug.RunHook("pipe.walk2", filepath.Join(relpath, name)) if isDir(fi) { - err = walk(basedir, subpath, done, jobs, ch) + err = walk(basedir, subpath, selectFunc, done, jobs, ch) if err != nil { return err } @@ -156,7 +170,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R // Walk sends a Job for each file and directory it finds below the paths. When // the channel done is closed, processing stops. -func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result) error { +func Walk(paths []string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error { defer func() { debug.Log("pipe.Walk", "output channel closed") close(jobs) @@ -166,7 +180,7 @@ func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result for _, path := range paths { debug.Log("pipe.Walk", "start walker for %v", path) ch := make(chan Result, 1) - err := walk(filepath.Dir(path), path, done, jobs, ch) + err := walk(filepath.Dir(path), path, selectFunc, done, jobs, ch) if err != nil { debug.Log("pipe.Walk", "error for %v: %v", path, err) continue diff --git a/pipe/pipe_test.go b/pipe/pipe_test.go index 42ff7c31d..001015938 100644 --- a/pipe/pipe_test.go +++ b/pipe/pipe_test.go @@ -19,6 +19,10 @@ type stats struct { dirs, files int } +func acceptAll(string, os.FileInfo) bool { + return true +} + func statPath(path string) (stats, error) { var s stats @@ -118,7 +122,7 @@ func TestPipelineWalkerWithSplit(t *testing.T) { }() resCh := make(chan pipe.Result, 1) - err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh) + err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) OK(t, err) // wait for all workers to terminate @@ -198,7 +202,7 @@ func TestPipelineWalker(t *testing.T) { } resCh := make(chan pipe.Result, 1) - err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh) + err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) OK(t, err) // wait for all workers to terminate @@ -298,7 +302,7 @@ func BenchmarkPipelineWalker(b *testing.B) { }() resCh := make(chan pipe.Result, 1) - err := pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh) + err := pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) OK(b, err) // wait for all workers to terminate @@ -375,7 +379,7 @@ func TestPipelineWalkerMultiple(t *testing.T) { } resCh := make(chan pipe.Result, 1) - err = pipe.Walk(paths, done, jobs, resCh) + err = pipe.Walk(paths, acceptAll, done, jobs, resCh) OK(t, err) // wait for all workers to terminate diff --git a/restorer.go b/restorer.go index 25bb1e3cd..e3e7292ab 100644 --- a/restorer.go +++ b/restorer.go @@ -18,8 +18,8 @@ type Restorer struct { repo *repository.Repository sn *Snapshot - Error func(dir string, node *Node, err error) error - SelectForRestore func(item string, dstpath string, node *Node) bool + Error func(dir string, node *Node, err error) error + SelectFilter func(item string, dstpath string, node *Node) bool } var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { return err } @@ -28,7 +28,7 @@ var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { r func NewRestorer(repo *repository.Repository, id backend.ID) (*Restorer, error) { r := &Restorer{ repo: repo, Error: restorerAbortOnAllErrors, - SelectForRestore: func(string, string, *Node) bool { return true }, + SelectFilter: func(string, string, *Node) bool { return true }, } var err error @@ -48,7 +48,7 @@ func (res *Restorer) restoreTo(dst string, dir string, treeID backend.ID) error } for _, node := range tree.Nodes { - selectedForRestore := res.SelectForRestore(filepath.Join(dir, node.Name), + selectedForRestore := res.SelectFilter(filepath.Join(dir, node.Name), filepath.Join(dst, dir, node.Name), node) debug.Log("Restorer.restoreNodeTo", "SelectForRestore returned %v", selectedForRestore) diff --git a/walk_test.go b/walk_test.go index 397655978..4e0f8b930 100644 --- a/walk_test.go +++ b/walk_test.go @@ -1,6 +1,7 @@ package restic_test import ( + "os" "path/filepath" "testing" @@ -33,7 +34,11 @@ func TestWalkTree(t *testing.T) { // start filesystem walker fsJobs := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) - go pipe.Walk(dirs, done, fsJobs, resCh) + + f := func(string, os.FileInfo) bool { + return true + } + go pipe.Walk(dirs, f, done, fsJobs, resCh) for { // receive fs job