Merge pull request #231 from restic/implement-filter-functions

Add filter implementation for files
This commit is contained in:
Alexander Neumann 2015-07-20 21:33:16 +02:00
commit ac8d01ac8c
13 changed files with 605 additions and 78 deletions

View File

@ -34,8 +34,8 @@ type Archiver struct {
blobToken chan struct{}
Error func(dir string, fi os.FileInfo, err error) error
Filter func(item string, fi os.FileInfo) bool
Error func(dir string, fi os.FileInfo, err error) error
SelectFilter pipe.SelectFunc
}
// NewArchiver returns a new archiver.
@ -50,7 +50,7 @@ func NewArchiver(repo *repository.Repository) *Archiver {
}
arch.Error = archiverAbortOnAllErrors
arch.Filter = archiverAllowAllFiles
arch.SelectFilter = archiverAllowAllFiles
return arch
}
@ -577,7 +577,7 @@ func (arch *Archiver) Snapshot(p *Progress, paths []string, parentID backend.ID)
pipeCh := make(chan pipe.Job)
resCh := make(chan pipe.Result, 1)
go func() {
err := pipe.Walk(paths, done, pipeCh, resCh)
err := pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh)
if err != nil {
debug.Log("Archiver.Snapshot", "pipe.Walk returned error %v", err)
return
@ -659,7 +659,7 @@ func isRegularFile(fi os.FileInfo) bool {
// Scan traverses the dirs to collect Stat information while emitting progress
// information with p.
func Scan(dirs []string, p *Progress) (Stat, error) {
func Scan(dirs []string, filter pipe.SelectFunc, p *Progress) (Stat, error) {
p.Start()
defer p.Done()
@ -678,6 +678,15 @@ func Scan(dirs []string, p *Progress) (Stat, error) {
fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
return nil
}
if !filter(str, fi) {
debug.Log("Scan.Walk", "path %v excluded", str)
if fi.IsDir() {
return filepath.SkipDir
}
return nil
}
s := Stat{}
if fi.IsDir() {
s.Dirs++

View File

@ -10,13 +10,15 @@ import (
"github.com/restic/restic"
"github.com/restic/restic/backend"
"github.com/restic/restic/filter"
"github.com/restic/restic/repository"
"golang.org/x/crypto/ssh/terminal"
)
type CmdBackup struct {
Parent string `short:"p" long:"parent" description:"use this parent snapshot (default: last snapshot in repo that has the same target)"`
Force bool `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"`
Parent string `short:"p" long:"parent" description:"use this parent snapshot (default: last snapshot in repo that has the same target)"`
Force bool `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"`
Exclude []string `short:"e" long:"exclude" description:"Exclude a pattern (can be specified multiple times)"`
global *GlobalOptions
}
@ -282,14 +284,22 @@ func (cmd CmdBackup) Execute(args []string) error {
cmd.global.Verbosef("scan %v\n", target)
stat, err := restic.Scan(target, cmd.newScanProgress())
selectFilter := func(item string, fi os.FileInfo) bool {
matched, err := filter.List(cmd.Exclude, item)
if err != nil {
cmd.global.Warnf("error for exclude pattern: %v", err)
}
// TODO: add filter
// arch.Filter = func(dir string, fi os.FileInfo) bool {
// return true
// }
return !matched
}
stat, err := restic.Scan(target, selectFilter, cmd.newScanProgress())
if err != nil {
return err
}
arch := restic.NewArchiver(repo)
arch.SelectFilter = selectFilter
arch.Error = func(dir string, fi os.FileInfo, err error) error {
// TODO: make ignoring errors configurable

View File

@ -11,6 +11,8 @@ import (
)
type CmdLs struct {
Long bool `short:"l" long:"long" description:"Use a long listing format showing size and mode"`
global *GlobalOptions
}
@ -24,7 +26,11 @@ func init() {
}
}
func printNode(prefix string, n *restic.Node) string {
func (cmd CmdLs) printNode(prefix string, n *restic.Node) string {
if !cmd.Long {
return filepath.Join(prefix, n.Name)
}
switch n.Type {
case "file":
return fmt.Sprintf("%s %5d %5d %6d %s %s",
@ -40,17 +46,17 @@ func printNode(prefix string, n *restic.Node) string {
}
}
func printTree(prefix string, repo *repository.Repository, id backend.ID) error {
func (cmd CmdLs) printTree(prefix string, repo *repository.Repository, id backend.ID) error {
tree, err := restic.LoadTree(repo, id)
if err != nil {
return err
}
for _, entry := range tree.Nodes {
fmt.Println(printNode(prefix, entry))
cmd.global.Printf(cmd.printNode(prefix, entry) + "\n")
if entry.Type == "dir" && entry.Subtree != nil {
err = printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree)
err = cmd.printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree)
if err != nil {
return err
}
@ -89,7 +95,7 @@ func (cmd CmdLs) Execute(args []string) error {
return err
}
fmt.Printf("snapshot of %v at %s:\n", sn.Paths, sn.Time)
cmd.global.Verbosef("snapshot of %v at %s:\n", sn.Paths, sn.Time)
return printTree("", repo, sn.Tree)
return cmd.printTree("", repo, sn.Tree)
}

View File

@ -1,14 +1,19 @@
package main
import (
"errors"
"fmt"
"path/filepath"
"github.com/restic/restic"
"github.com/restic/restic/debug"
"github.com/restic/restic/filter"
)
type CmdRestore struct {
Exclude []string `short:"e" long:"exclude" description:"Exclude a pattern (can be specified multiple times)"`
Include []string `short:"i" long:"include" description:"Include a pattern, exclude everything else (can be specified multiple times)"`
Target string `short:"t" long:"target" description:"Directory to restore to"`
global *GlobalOptions
}
@ -23,14 +28,26 @@ func init() {
}
func (cmd CmdRestore) Usage() string {
return "snapshot-ID TARGETDIR [PATTERN]"
return "snapshot-ID"
}
func (cmd CmdRestore) Execute(args []string) error {
if len(args) < 2 || len(args) > 3 {
if len(args) != 1 {
return fmt.Errorf("wrong number of arguments, Usage: %s", cmd.Usage())
}
if cmd.Target == "" {
return errors.New("please specify a directory to restore to (--target)")
}
if len(cmd.Exclude) > 0 && len(cmd.Include) > 0 {
return errors.New("exclude and include patterns are mutually exclusive")
}
snapshotIDString := args[0]
debug.Log("restore", "restore %v to %v", snapshotIDString, cmd.Target)
repo, err := cmd.global.OpenRepository()
if err != nil {
return err
@ -47,14 +64,11 @@ func (cmd CmdRestore) Execute(args []string) error {
return err
}
id, err := restic.FindSnapshot(repo, args[0])
id, err := restic.FindSnapshot(repo, snapshotIDString)
if err != nil {
cmd.global.Exitf(1, "invalid id %q: %v", args[0], err)
cmd.global.Exitf(1, "invalid id %q: %v", snapshotIDString, err)
}
target := args[1]
// create restorer
res, err := restic.NewRestorer(repo, id)
if err != nil {
cmd.global.Exitf(2, "creating restorer failed: %v\n", err)
@ -62,41 +76,36 @@ func (cmd CmdRestore) Execute(args []string) error {
res.Error = func(dir string, node *restic.Node, err error) error {
cmd.global.Warnf("error for %s: %+v\n", dir, err)
// if node.Type == "dir" {
// if e, ok := err.(*os.PathError); ok {
// if errn, ok := e.Err.(syscall.Errno); ok {
// if errn == syscall.EEXIST {
// fmt.Printf("ignoring already existing directory %s\n", dir)
// return nil
// }
// }
// }
// }
return err
}
// TODO: a filter against the full path sucks as filepath.Match doesn't match
// directory separators on '*'. still, it's better than nothing.
if len(args) > 2 {
pattern := args[2]
cmd.global.Verbosef("filter pattern %q\n", pattern)
res.SelectForRestore = func(item string, dstpath string, node *restic.Node) bool {
matched, err := filepath.Match(pattern, node.Name)
if err != nil {
panic(err)
}
if !matched {
debug.Log("restic.restore", "item %v doesn't match pattern %q", item, pattern)
}
return matched
selectExcludeFilter := func(item string, dstpath string, node *restic.Node) bool {
matched, err := filter.List(cmd.Exclude, item)
if err != nil {
cmd.global.Warnf("error for exclude pattern: %v", err)
}
return !matched
}
cmd.global.Verbosef("restoring %s to %s\n", res.Snapshot(), target)
selectIncludeFilter := func(item string, dstpath string, node *restic.Node) bool {
matched, err := filter.List(cmd.Include, item)
if err != nil {
cmd.global.Warnf("error for include pattern: %v", err)
}
err = res.RestoreTo(target)
return matched
}
if len(cmd.Exclude) > 0 {
res.SelectFilter = selectExcludeFilter
} else if len(cmd.Include) > 0 {
res.SelectFilter = selectIncludeFilter
}
cmd.global.Verbosef("restoring %s to %s\n", res.Snapshot(), cmd.Target)
err = res.RestoreTo(cmd.Target)
if err != nil {
return err
}

View File

@ -10,12 +10,14 @@ import (
"os"
"path/filepath"
"regexp"
"strings"
"syscall"
"testing"
"time"
"github.com/restic/restic/backend"
"github.com/restic/restic/debug"
"github.com/restic/restic/filter"
. "github.com/restic/restic/test"
)
@ -44,7 +46,11 @@ func cmdInit(t testing.TB, global GlobalOptions) {
}
func cmdBackup(t testing.TB, global GlobalOptions, target []string, parentID backend.ID) {
cmd := &CmdBackup{global: &global}
cmdBackupExcludes(t, global, target, parentID, nil)
}
func cmdBackupExcludes(t testing.TB, global GlobalOptions, target []string, parentID backend.ID, excludes []string) {
cmd := &CmdBackup{global: &global, Exclude: excludes}
cmd.Parent = parentID.String()
t.Logf("backing up %v", target)
@ -63,9 +69,18 @@ func cmdList(t testing.TB, global GlobalOptions, tpe string) []backend.ID {
return IDs
}
func cmdRestore(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID, args ...string) {
cmd := &CmdRestore{global: &global}
cmd.Execute(append([]string{snapshotID.String(), dir}, args...))
func cmdRestore(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID) {
cmdRestoreExcludes(t, global, dir, snapshotID, nil)
}
func cmdRestoreExcludes(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID, excludes []string) {
cmd := &CmdRestore{global: &global, Target: dir, Exclude: excludes}
OK(t, cmd.Execute([]string{snapshotID.String()}))
}
func cmdRestoreIncludes(t testing.TB, global GlobalOptions, dir string, snapshotID backend.ID, includes []string) {
cmd := &CmdRestore{global: &global, Target: dir, Include: includes}
OK(t, cmd.Execute([]string{snapshotID.String()}))
}
func cmdCheck(t testing.TB, global GlobalOptions) {
@ -73,6 +88,16 @@ func cmdCheck(t testing.TB, global GlobalOptions) {
OK(t, cmd.Execute(nil))
}
func cmdLs(t testing.TB, global GlobalOptions, snapshotID string) []string {
var buf bytes.Buffer
global.stdout = &buf
cmd := &CmdLs{global: &global}
OK(t, cmd.Execute([]string{snapshotID}))
return strings.Split(string(buf.Bytes()), "\n")
}
func TestBackup(t *testing.T) {
withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) {
datafile := filepath.Join("testdata", "backup-data.tar.gz")
@ -237,6 +262,86 @@ func TestBackupMissingFile2(t *testing.T) {
})
}
func includes(haystack []string, needle string) bool {
for _, s := range haystack {
if s == needle {
return true
}
}
return false
}
func loadSnapshotMap(t testing.TB, global GlobalOptions) map[string]struct{} {
snapshotIDs := cmdList(t, global, "snapshots")
m := make(map[string]struct{})
for _, id := range snapshotIDs {
m[id.String()] = struct{}{}
}
return m
}
func lastSnapshot(old, new map[string]struct{}) (map[string]struct{}, string) {
for k := range new {
if _, ok := old[k]; !ok {
old[k] = struct{}{}
return old, k
}
}
return old, ""
}
var backupExcludeFilenames = []string{
"testfile1",
"foo.tar.gz",
"private/secret/passwords.txt",
"work/source/test.c",
}
func TestBackupExclude(t *testing.T) {
withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) {
cmdInit(t, global)
datadir := filepath.Join(env.base, "testdata")
for _, filename := range backupExcludeFilenames {
fp := filepath.Join(datadir, filename)
OK(t, os.MkdirAll(filepath.Dir(fp), 0755))
f, err := os.Create(fp)
OK(t, err)
fmt.Fprintf(f, filename)
OK(t, f.Close())
}
snapshots := make(map[string]struct{})
cmdBackup(t, global, []string{datadir}, nil)
snapshots, snapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, global))
files := cmdLs(t, global, snapshotID)
Assert(t, includes(files, filepath.Join("testdata", "foo.tar.gz")),
"expected file %q in first snapshot, but it's not included", "foo.tar.gz")
cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz"})
snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global))
files = cmdLs(t, global, snapshotID)
Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")),
"expected file %q not in first snapshot, but it's included", "foo.tar.gz")
cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz", "private/secret"})
snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global))
files = cmdLs(t, global, snapshotID)
Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")),
"expected file %q not in first snapshot, but it's included", "foo.tar.gz")
Assert(t, !includes(files, filepath.Join("testdata", "private", "secret", "passwords.txt")),
"expected file %q not in first snapshot, but it's included", "passwords.txt")
})
}
const (
incrementalFirstWrite = 20 * 1042 * 1024
incrementalSecondWrite = 12 * 1042 * 1024
@ -422,10 +527,10 @@ func TestRestoreFilter(t *testing.T) {
for i, pat := range []string{"*.c", "*.exe", "*", "*file3*"} {
base := filepath.Join(env.base, fmt.Sprintf("restore%d", i+1))
cmdRestore(t, global, base, snapshotID, pat)
cmdRestoreExcludes(t, global, base, snapshotID, []string{pat})
for _, test := range testfiles {
err := testFileSize(filepath.Join(base, "testdata", test.name), int64(test.size))
if ok, _ := filepath.Match(pat, filepath.Base(test.name)); ok {
if ok, _ := filter.Match(pat, filepath.Base(test.name)); !ok {
OK(t, err)
} else {
Assert(t, os.IsNotExist(err),
@ -463,7 +568,7 @@ func TestRestoreNoMetadataOnIgnoredIntermediateDirs(t *testing.T) {
// restore with filter "*.ext", this should restore "file.ext", but
// since the directories are ignored and only created because of
// "file.ext", no meta data should be restored for them.
cmdRestore(t, global, filepath.Join(env.base, "restore0"), snapshotID, "*.ext")
cmdRestoreIncludes(t, global, filepath.Join(env.base, "restore0"), snapshotID, []string{"*.ext"})
f1 := filepath.Join(env.base, "restore0", "testdata", "subdir1", "subdir2")
fi, err := os.Stat(f1)
@ -473,7 +578,7 @@ func TestRestoreNoMetadataOnIgnoredIntermediateDirs(t *testing.T) {
"meta data of intermediate directory has been restore although it was ignored")
// restore with filter "*", this should restore meta data on everything.
cmdRestore(t, global, filepath.Join(env.base, "restore1"), snapshotID, "*")
cmdRestoreIncludes(t, global, filepath.Join(env.base, "restore1"), snapshotID, []string{"*"})
f2 := filepath.Join(env.base, "restore1", "testdata", "subdir1", "subdir2")
fi, err = os.Stat(f2)

5
filter/doc.go Normal file
View File

@ -0,0 +1,5 @@
// Package filter implements filters for files similar to filepath.Glob, but
// in contrast to filepath.Glob a pattern may specify directories.
//
// For a list of valid patterns please see the documentation on filepath.Glob.
package filter

108
filter/filter.go Normal file
View File

@ -0,0 +1,108 @@
package filter
import (
"errors"
"path/filepath"
"strings"
)
// ErrBadString is returned when Match is called with the empty string as the
// second argument.
var ErrBadString = errors.New("filter.Match: string is empty")
// Match returns true if str matches the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
func Match(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
if str == "" {
return false, ErrBadString
}
patterns := strings.Split(pattern, string(filepath.Separator))
strs := strings.Split(str, string(filepath.Separator))
return match(patterns, strs)
}
func hasDoubleWildcard(list []string) (ok bool, pos int) {
for i, item := range list {
if item == "**" {
return true, i
}
}
return false, 0
}
func match(patterns, strs []string) (matched bool, err error) {
if ok, pos := hasDoubleWildcard(patterns); ok {
// gradually expand '**' into separate wildcards
for i := 0; i <= len(strs)-len(patterns)+1; i++ {
newPat := make([]string, pos)
copy(newPat, patterns[:pos])
for k := 0; k < i; k++ {
newPat = append(newPat, "*")
}
newPat = append(newPat, patterns[pos+1:]...)
matched, err := match(newPat, strs)
if err != nil {
return false, err
}
if matched {
return true, nil
}
}
return false, nil
}
if len(patterns) == 0 && len(strs) == 0 {
return true, nil
}
if len(patterns) <= len(strs) {
outer:
for offset := len(strs) - len(patterns); offset >= 0; offset-- {
for i := len(patterns) - 1; i >= 0; i-- {
ok, err := filepath.Match(patterns[i], strs[offset+i])
if err != nil {
return false, err
}
if !ok {
continue outer
}
}
return true, nil
}
}
return false, nil
}
// List returns true if str matches one of the patterns.
func List(patterns []string, str string) (matched bool, err error) {
for _, pat := range patterns {
matched, err = Match(pat, str)
if err != nil {
return false, err
}
if matched {
return true, nil
}
}
return false, nil
}

252
filter/filter_test.go Normal file
View File

@ -0,0 +1,252 @@
package filter_test
import (
"bufio"
"compress/bzip2"
"fmt"
"os"
"testing"
"github.com/restic/restic/filter"
)
var matchTests = []struct {
pattern string
path string
match bool
}{
{"", "", true},
{"", "foo", true},
{"", "/x/y/z/foo", true},
{"*.go", "/foo/bar/test.go", true},
{"*.c", "/foo/bar/test.go", false},
{"*", "/foo/bar/test.go", true},
{"foo*", "/foo/bar/test.go", true},
{"bar*", "/foo/bar/test.go", true},
{"/bar*", "/foo/bar/test.go", false},
{"bar/*", "/foo/bar/test.go", true},
{"baz/*", "/foo/bar/test.go", false},
{"bar/test.go", "/foo/bar/test.go", true},
{"bar/*.go", "/foo/bar/test.go", true},
{"ba*/*.go", "/foo/bar/test.go", true},
{"bb*/*.go", "/foo/bar/test.go", false},
{"test.*", "/foo/bar/test.go", true},
{"tesT.*", "/foo/bar/test.go", false},
{"bar/*", "/foo/bar/baz", true},
{"bar", "/foo/bar", true},
{"bar", "/foo/bar/baz", true},
{"bar", "/foo/bar/test.go", true},
{"/foo/*test.*", "/foo/bar/test.go", false},
{"/foo/*/test.*", "/foo/bar/test.go", true},
{"/foo/*/bar/test.*", "/foo/bar/test.go", false},
{"/*/*/bar/test.*", "/foo/bar/test.go", false},
{"/*/*/bar/test.*", "/foo/bar/baz/test.go", false},
{"/*/*/baz/test.*", "/foo/bar/baz/test.go", true},
{"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false},
{"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false},
{"/foo/bar/test.*", "bar/baz/test.go", false},
{"/x/y/bar/baz/test.*", "bar/baz/test.go", false},
{"/x/y/bar/baz/test.c", "bar/baz/test.go", false},
{"baz/test.*", "bar/baz/test.go", true},
{"baz/tesT.*", "bar/baz/test.go", false},
{"test.go", "bar/baz/test.go", true},
{"*.go", "bar/baz/test.go", true},
{"*.c", "bar/baz/test.go", false},
{"sdk", "/foo/bar/sdk", true},
{"sdk", "/foo/bar/sdk/test/sdk_foo.go", true},
{
"sdk/*/cpp/*/*vars*.html",
"/usr/share/doc/libreoffice/sdk/docs/cpp/ref/a00517.html",
false,
},
{"foo/**/bar/*.go", "/home/user/foo/work/special/project/bar/test.go", true},
{"foo/**/bar/*.go", "/home/user/foo/bar/test.go", true},
{"foo/**/bar/*.go", "x/foo/bar/test.go", true},
{"foo/**/bar/*.go", "foo/bar/test.go", true},
{"foo/**/bar/*.go", "/home/user/foo/test.c", false},
{"foo/**/bar/*.go", "bar/foo/main.go", false},
{"foo/**/bar/*.go", "/foo/bar/main.go", true},
{"foo/**/bar/*.go", "bar/main.go", false},
{"foo/**/bar", "/home/user/foo/x/y/bar", true},
{"foo/**/bar", "/home/user/foo/x/y/bar/main.go", true},
{"user/**/important*", "/home/user/work/x/y/hidden/x", false},
{"user/**/hidden*/**/c", "/home/user/work/x/y/hidden/z/a/b/c", true},
}
func TestMatch(t *testing.T) {
for i, test := range matchTests {
match, err := filter.Match(test.pattern, test.path)
if err != nil {
t.Errorf("test %d failed: expected no error for pattern %q, but error returned: %v",
i, test.pattern, err)
continue
}
if match != test.match {
t.Errorf("test %d: filter.Match(%q, %q): expected %v, got %v",
i, test.pattern, test.path, test.match, match)
}
}
}
func ExampleMatch() {
match, _ := filter.Match("*.go", "/home/user/file.go")
fmt.Printf("match: %v\n", match)
// Output:
// match: true
}
func ExampleMatch_wildcards() {
match, _ := filter.Match("/home/[uU]ser/?.go", "/home/user/F.go")
fmt.Printf("match: %v\n", match)
// Output:
// match: true
}
var filterListTests = []struct {
patterns []string
path string
match bool
}{
{[]string{"*.go"}, "/foo/bar/test.go", true},
{[]string{"*.c"}, "/foo/bar/test.go", false},
{[]string{"*.go", "*.c"}, "/foo/bar/test.go", true},
{[]string{"*"}, "/foo/bar/test.go", true},
{[]string{"x"}, "/foo/bar/test.go", false},
{[]string{"?"}, "/foo/bar/test.go", false},
{[]string{"?", "x"}, "/foo/bar/x", true},
{[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false},
{[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true},
}
func TestMatchList(t *testing.T) {
for i, test := range filterListTests {
match, err := filter.List(test.patterns, test.path)
if err != nil {
t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v",
i, test.patterns, err)
continue
}
if match != test.match {
t.Errorf("test %d: filter.MatchList(%q, %q): expected %v, got %v",
i, test.patterns, test.path, test.match, match)
}
}
}
func ExampleMatchList() {
match, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go")
fmt.Printf("match: %v\n", match)
// Output:
// match: true
}
func extractTestLines(t testing.TB) (lines []string) {
f, err := os.Open("testdata/libreoffice.txt.bz2")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := f.Close(); err != nil {
t.Fatal(err)
}
}()
sc := bufio.NewScanner(bzip2.NewReader(f))
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines
}
func TestFilterPatternsFile(t *testing.T) {
lines := extractTestLines(t)
var testPatterns = []struct {
pattern string
hits uint
}{
{"*.html", 18249},
{"sdk", 22186},
{"sdk/*/cpp/*/*vars.html", 3},
}
for _, test := range testPatterns {
var c uint
for _, line := range lines {
match, err := filter.Match(test.pattern, line)
if err != nil {
t.Error(err)
continue
}
if match {
c++
// fmt.Printf("pattern %q, line %q\n", test.pattern, line)
}
}
if c != test.hits {
t.Errorf("wrong number of hits for pattern %q: want %d, got %d",
test.pattern, test.hits, c)
}
}
}
func BenchmarkFilterLines(b *testing.B) {
pattern := "sdk/*/cpp/*/*vars.html"
lines := extractTestLines(b)
var c uint
b.ResetTimer()
for i := 0; i < b.N; i++ {
c = 0
for _, line := range lines {
match, err := filter.Match(pattern, line)
if err != nil {
b.Fatal(err)
}
if match {
c++
}
}
if c != 3 {
b.Fatalf("wrong number of matches: expected 3, got %d", c)
}
}
}
func BenchmarkFilterPatterns(b *testing.B) {
patterns := []string{
"sdk/*",
"*.html",
}
lines := extractTestLines(b)
var c uint
b.ResetTimer()
for i := 0; i < b.N; i++ {
c = 0
for _, line := range lines {
match, err := filter.List(patterns, line)
if err != nil {
b.Fatal(err)
}
if match {
c++
}
}
if c != 22185 {
b.Fatalf("wrong number of matches: expected 22185, got %d", c)
}
}
}

BIN
filter/testdata/libreoffice.txt.bz2 vendored Normal file

Binary file not shown.

View File

@ -82,13 +82,22 @@ func isFile(fi os.FileInfo) bool {
var errCancelled = errors.New("walk cancelled")
func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
// SelectFunc returns true for all items that should be included (files and
// dirs). If false is returned, files are ignored and dirs are not even walked.
type SelectFunc func(item string, fi os.FileInfo) bool
func walk(basedir, dir string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
info, err := os.Lstat(dir)
if err != nil {
debug.Log("pipe.walk", "error for %v: %v", dir, err)
return err
}
if !selectFunc(dir, info) {
debug.Log("pipe.walk", "file %v excluded by filter", dir)
return nil
}
relpath, _ := filepath.Rel(basedir, dir)
if !info.IsDir() {
@ -114,13 +123,18 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R
for _, name := range names {
subpath := filepath.Join(dir, name)
fi, statErr := os.Lstat(subpath)
if !selectFunc(subpath, fi) {
debug.Log("pipe.walk", "file %v excluded by filter", subpath)
continue
}
ch := make(chan Result, 1)
entries = append(entries, ch)
fi, err := os.Lstat(subpath)
if err != nil {
if statErr != nil {
select {
case jobs <- Entry{info: fi, error: err, basedir: basedir, path: filepath.Join(relpath, name), result: ch}:
case jobs <- Entry{info: fi, error: statErr, basedir: basedir, path: filepath.Join(relpath, name), result: ch}:
case <-done:
return errCancelled
}
@ -132,7 +146,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R
debug.RunHook("pipe.walk2", filepath.Join(relpath, name))
if isDir(fi) {
err = walk(basedir, subpath, done, jobs, ch)
err = walk(basedir, subpath, selectFunc, done, jobs, ch)
if err != nil {
return err
}
@ -156,7 +170,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R
// Walk sends a Job for each file and directory it finds below the paths. When
// the channel done is closed, processing stops.
func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
func Walk(paths []string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
defer func() {
debug.Log("pipe.Walk", "output channel closed")
close(jobs)
@ -166,7 +180,7 @@ func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result
for _, path := range paths {
debug.Log("pipe.Walk", "start walker for %v", path)
ch := make(chan Result, 1)
err := walk(filepath.Dir(path), path, done, jobs, ch)
err := walk(filepath.Dir(path), path, selectFunc, done, jobs, ch)
if err != nil {
debug.Log("pipe.Walk", "error for %v: %v", path, err)
continue

View File

@ -19,6 +19,10 @@ type stats struct {
dirs, files int
}
func acceptAll(string, os.FileInfo) bool {
return true
}
func statPath(path string) (stats, error) {
var s stats
@ -118,7 +122,7 @@ func TestPipelineWalkerWithSplit(t *testing.T) {
}()
resCh := make(chan pipe.Result, 1)
err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh)
err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh)
OK(t, err)
// wait for all workers to terminate
@ -198,7 +202,7 @@ func TestPipelineWalker(t *testing.T) {
}
resCh := make(chan pipe.Result, 1)
err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh)
err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh)
OK(t, err)
// wait for all workers to terminate
@ -298,7 +302,7 @@ func BenchmarkPipelineWalker(b *testing.B) {
}()
resCh := make(chan pipe.Result, 1)
err := pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh)
err := pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh)
OK(b, err)
// wait for all workers to terminate
@ -375,7 +379,7 @@ func TestPipelineWalkerMultiple(t *testing.T) {
}
resCh := make(chan pipe.Result, 1)
err = pipe.Walk(paths, done, jobs, resCh)
err = pipe.Walk(paths, acceptAll, done, jobs, resCh)
OK(t, err)
// wait for all workers to terminate

View File

@ -18,8 +18,8 @@ type Restorer struct {
repo *repository.Repository
sn *Snapshot
Error func(dir string, node *Node, err error) error
SelectForRestore func(item string, dstpath string, node *Node) bool
Error func(dir string, node *Node, err error) error
SelectFilter func(item string, dstpath string, node *Node) bool
}
var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { return err }
@ -28,7 +28,7 @@ var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { r
func NewRestorer(repo *repository.Repository, id backend.ID) (*Restorer, error) {
r := &Restorer{
repo: repo, Error: restorerAbortOnAllErrors,
SelectForRestore: func(string, string, *Node) bool { return true },
SelectFilter: func(string, string, *Node) bool { return true },
}
var err error
@ -48,7 +48,7 @@ func (res *Restorer) restoreTo(dst string, dir string, treeID backend.ID) error
}
for _, node := range tree.Nodes {
selectedForRestore := res.SelectForRestore(filepath.Join(dir, node.Name),
selectedForRestore := res.SelectFilter(filepath.Join(dir, node.Name),
filepath.Join(dst, dir, node.Name), node)
debug.Log("Restorer.restoreNodeTo", "SelectForRestore returned %v", selectedForRestore)

View File

@ -1,6 +1,7 @@
package restic_test
import (
"os"
"path/filepath"
"testing"
@ -33,7 +34,11 @@ func TestWalkTree(t *testing.T) {
// start filesystem walker
fsJobs := make(chan pipe.Job)
resCh := make(chan pipe.Result, 1)
go pipe.Walk(dirs, done, fsJobs, resCh)
f := func(string, os.FileInfo) bool {
return true
}
go pipe.Walk(dirs, f, done, fsJobs, resCh)
for {
// receive fs job