diff --git a/filter/doc.go b/filter/doc.go new file mode 100644 index 000000000..4e0ba0f4d --- /dev/null +++ b/filter/doc.go @@ -0,0 +1,5 @@ +// Package filter implements filters for files similar to filepath.Glob, but +// in contrast to filepath.Glob a pattern may specify directories. +// +// For a list of valid patterns please see the documentation on filepath.Glob. +package filter diff --git a/filter/filter.go b/filter/filter.go new file mode 100644 index 000000000..493483a88 --- /dev/null +++ b/filter/filter.go @@ -0,0 +1,151 @@ +package filter + +import ( + "errors" + "path/filepath" + "strings" +) + +// ErrBadString is returned when Match is called with the empty string as the +// second argument. +var ErrBadString = errors.New("filter.Match: string is empty") + +// Match returns true if str matches the pattern. When the pattern is +// malformed, filepath.ErrBadPattern is returned. The empty pattern matches +// everything, when str is the empty string ErrBadString is returned. +// +// Pattern can be a combination of patterns suitable for filepath.Match, joined +// by filepath.Separator. +func Match(pattern, str string) (matched bool, err error) { + if pattern == "" { + return true, nil + } + + if str == "" { + return false, ErrBadString + } + + patterns := strings.Split(pattern, string(filepath.Separator)) + strs := strings.Split(str, string(filepath.Separator)) + + return match(patterns, strs) +} + +func match(patterns, strs []string) (matched bool, err error) { + if len(patterns) == 0 && len(strs) == 0 { + return true, nil + } + + if len(patterns) <= len(strs) { + outer: + for offset := len(strs) - len(patterns); offset >= 0; offset-- { + + for i := len(patterns) - 1; i >= 0; i-- { + ok, err := filepath.Match(patterns[i], strs[offset+i]) + if err != nil { + return false, err + } + + if !ok { + continue outer + } + } + + return true, nil + } + } + + return false, nil +} + +// MatchList returns true if str matches one of the patterns. +func MatchList(patterns []string, str string) (matched bool, err error) { + for _, pat := range patterns { + matched, err = Match(pat, str) + if err != nil { + return false, err + } + + if matched { + return true, nil + } + } + + return false, nil +} + +// matchList returns true if str matches one of the patterns. +func matchList(patterns [][]string, str []string) (matched bool, err error) { + for _, pat := range patterns { + matched, err = match(pat, str) + if err != nil { + return false, err + } + + if matched { + return true, nil + } + } + + return false, nil +} + +// Filter contains include and exclude patterns. If both lists of patterns are +// empty, all files are accepted. +type Filter struct { + include, exclude [][]string +} + +// New returns a new filter with the given include/exclude lists of patterns. +func New(include, exclude []string) *Filter { + f := &Filter{} + + for _, pat := range include { + f.include = append(f.include, strings.Split(pat, string(filepath.Separator))) + } + + for _, pat := range exclude { + f.exclude = append(f.exclude, strings.Split(pat, string(filepath.Separator))) + } + + return f +} + +// Match tests a filename against the filter. If include and exclude patterns +// are both empty, true is returned. +// +// If only include patterns and no exclude patterns are configured, true is +// returned iff name matches one of the include patterns. +// +// If only exclude patterns and no include patterns are configured, true is +// returned iff name does not match all of the exclude patterns. +func (f Filter) Match(name string) (matched bool, err error) { + if name == "" { + return false, ErrBadString + } + + if len(f.include) == 0 && len(f.exclude) == 0 { + return true, nil + } + + names := strings.Split(name, string(filepath.Separator)) + if len(f.exclude) == 0 { + return matchList(f.include, names) + } + + if len(f.include) == 0 { + match, err := matchList(f.exclude, names) + return !match, err + } + + excluded, err := matchList(f.exclude, names) + if err != nil { + return false, err + } + + if !excluded { + return true, nil + } + + return matchList(f.include, names) +} diff --git a/filter/filter_test.go b/filter/filter_test.go new file mode 100644 index 000000000..cd21ec054 --- /dev/null +++ b/filter/filter_test.go @@ -0,0 +1,325 @@ +package filter_test + +import ( + "bufio" + "compress/bzip2" + "fmt" + "os" + "testing" + + "github.com/restic/restic/filter" +) + +var matchTests = []struct { + pattern string + path string + match bool +}{ + {"", "", true}, + {"", "foo", true}, + {"", "/x/y/z/foo", true}, + {"*.go", "/foo/bar/test.go", true}, + {"*.c", "/foo/bar/test.go", false}, + {"*", "/foo/bar/test.go", true}, + {"foo*", "/foo/bar/test.go", true}, + {"bar*", "/foo/bar/test.go", true}, + {"/bar*", "/foo/bar/test.go", false}, + {"bar/*", "/foo/bar/test.go", true}, + {"baz/*", "/foo/bar/test.go", false}, + {"bar/test.go", "/foo/bar/test.go", true}, + {"bar/*.go", "/foo/bar/test.go", true}, + {"ba*/*.go", "/foo/bar/test.go", true}, + {"bb*/*.go", "/foo/bar/test.go", false}, + {"test.*", "/foo/bar/test.go", true}, + {"tesT.*", "/foo/bar/test.go", false}, + {"bar/*", "/foo/bar/baz", true}, + {"bar", "/foo/bar", true}, + {"bar", "/foo/bar/baz", true}, + {"bar", "/foo/bar/test.go", true}, + {"/foo/*test.*", "/foo/bar/test.go", false}, + {"/foo/*/test.*", "/foo/bar/test.go", true}, + {"/foo/*/bar/test.*", "/foo/bar/test.go", false}, + {"/*/*/bar/test.*", "/foo/bar/test.go", false}, + {"/*/*/bar/test.*", "/foo/bar/baz/test.go", false}, + {"/*/*/baz/test.*", "/foo/bar/baz/test.go", true}, + {"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false}, + {"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false}, + {"/foo/bar/test.*", "bar/baz/test.go", false}, + {"/x/y/bar/baz/test.*", "bar/baz/test.go", false}, + {"/x/y/bar/baz/test.c", "bar/baz/test.go", false}, + {"baz/test.*", "bar/baz/test.go", true}, + {"baz/tesT.*", "bar/baz/test.go", false}, + {"test.go", "bar/baz/test.go", true}, + {"*.go", "bar/baz/test.go", true}, + {"*.c", "bar/baz/test.go", false}, + {"sdk", "/foo/bar/sdk", true}, + {"sdk", "/foo/bar/sdk/test/sdk_foo.go", true}, + {"sdk/*/cpp/*/*vars*.html", "/usr/share/doc/libreoffice/sdk/docs/cpp/ref/a00517.html", false}, +} + +func TestMatch(t *testing.T) { + for i, test := range matchTests { + match, err := filter.Match(test.pattern, test.path) + if err != nil { + t.Errorf("test %d failed: expected no error for pattern %q, but error returned: %v", + i, test.pattern, err) + continue + } + + if match != test.match { + t.Errorf("test %d: filter.Match(%q, %q): expected %v, got %v", + i, test.pattern, test.path, test.match, match) + } + } +} + +func ExampleMatch() { + match, _ := filter.Match("*.go", "/home/user/file.go") + fmt.Printf("match: %v\n", match) + // Output: + // match: true +} + +func ExampleMatch_wildcards() { + match, _ := filter.Match("/home/[uU]ser/?.go", "/home/user/F.go") + fmt.Printf("match: %v\n", match) + // Output: + // match: true +} + +var filterListTests = []struct { + patterns []string + path string + match bool +}{ + {[]string{"*.go"}, "/foo/bar/test.go", true}, + {[]string{"*.c"}, "/foo/bar/test.go", false}, + {[]string{"*.go", "*.c"}, "/foo/bar/test.go", true}, + {[]string{"*"}, "/foo/bar/test.go", true}, + {[]string{"x"}, "/foo/bar/test.go", false}, + {[]string{"?"}, "/foo/bar/test.go", false}, + {[]string{"?", "x"}, "/foo/bar/x", true}, + {[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false}, + {[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true}, +} + +func TestMatchList(t *testing.T) { + for i, test := range filterListTests { + match, err := filter.MatchList(test.patterns, test.path) + if err != nil { + t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v", + i, test.patterns, err) + continue + } + + if match != test.match { + t.Errorf("test %d: filter.MatchList(%q, %q): expected %v, got %v", + i, test.patterns, test.path, test.match, match) + } + } +} + +func ExampleMatchList() { + match, _ := filter.MatchList([]string{"*.c", "*.go"}, "/home/user/file.go") + fmt.Printf("match: %v\n", match) + // Output: + // match: true +} + +func extractTestLines(t testing.TB) (lines []string) { + f, err := os.Open("testdata/libreoffice.txt.bz2") + if err != nil { + t.Fatal(err) + } + + defer func() { + if err := f.Close(); err != nil { + t.Fatal(err) + } + }() + + sc := bufio.NewScanner(bzip2.NewReader(f)) + for sc.Scan() { + lines = append(lines, sc.Text()) + } + + return lines +} + +func TestFilterPatternsFile(t *testing.T) { + lines := extractTestLines(t) + + var testPatterns = []struct { + pattern string + hits uint + }{ + {"*.html", 18249}, + {"sdk", 22186}, + {"sdk/*/cpp/*/*vars.html", 3}, + } + + for _, test := range testPatterns { + var c uint + for _, line := range lines { + match, err := filter.Match(test.pattern, line) + if err != nil { + t.Error(err) + continue + } + + if match { + c++ + // fmt.Printf("pattern %q, line %q\n", test.pattern, line) + } + } + + if c != test.hits { + t.Errorf("wrong number of hits for pattern %q: want %d, got %d", + test.pattern, test.hits, c) + } + } +} + +func BenchmarkFilterLines(b *testing.B) { + pattern := "sdk/*/cpp/*/*vars.html" + lines := extractTestLines(b) + var c uint + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + c = 0 + for _, line := range lines { + match, err := filter.Match(pattern, line) + if err != nil { + b.Fatal(err) + } + + if match { + c++ + } + } + + if c != 3 { + b.Fatalf("wrong number of matches: expected 3, got %d", c) + } + } +} + +func BenchmarkFilterSingle(b *testing.B) { + pattern := "sdk/*/cpp/*/*vars.html" + line := "/usr/share/doc/libreoffice/sdk/docs/cpp/ref/a00517.html" + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + filter.Match(pattern, line) + } +} + +type test struct { + path string + match bool +} + +var filterTests = []struct { + include, exclude []string + tests []test +}{ + { + []string{"*.go", "/home/user"}, + []string{}, + []test{ + {"/home/user/foo/test.c", true}, + {"/home/user/foo/test.go", true}, + {"/home/foo/test.go", true}, + {"/home/foo/test.doc", false}, + {"/x", false}, + {"main.go", true}, + }, + }, + { + nil, + []string{"*.docx", "*.xlsx"}, + []test{ + {"/home/user/foo/test.c", true}, + {"/home/user/foo/test.docx", false}, + {"/home/foo/test.xlsx", false}, + {"/home/foo/test.doc", true}, + {"/x", true}, + {"main.go", true}, + }, + }, + { + []string{"accounting.*", "*Partner*"}, + []string{"*.docx", "*.xlsx"}, + []test{ + // {"/home/user/foo/test.c", true}, + {"/home/user/Partner/test.docx", true}, + {"/home/user/bar/test.docx", false}, + {"/home/user/test.xlsx", false}, + {"/home/foo/test.doc", true}, + {"/x", true}, + {"main.go", true}, + {"/users/A/accounting.xlsx", true}, + {"/users/A/Calculation Partner.xlsx", true}, + }, + }, +} + +func TestFilter(t *testing.T) { + for i, test := range filterTests { + f := filter.New(test.include, test.exclude) + + for _, testfile := range test.tests { + matched, err := f.Match(testfile.path) + if err != nil { + t.Error(err) + } + + if matched != testfile.match { + t.Errorf("test %d: filter.Match(%q): expected %v, got %v", + i, testfile.path, testfile.match, matched) + } + } + } +} + +func BenchmarkFilter(b *testing.B) { + lines := extractTestLines(b) + f := filter.New([]string{"sdk", "*.html"}, []string{"*.png"}) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, line := range lines { + f.Match(line) + } + } +} + +func BenchmarkFilterInclude(b *testing.B) { + lines := extractTestLines(b) + f := filter.New([]string{"sdk", "*.html"}, nil) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, line := range lines { + f.Match(line) + } + } +} + +func BenchmarkFilterExclude(b *testing.B) { + lines := extractTestLines(b) + f := filter.New(nil, []string{"*.png"}) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, line := range lines { + f.Match(line) + } + } +} diff --git a/filter/testdata/libreoffice.txt.bz2 b/filter/testdata/libreoffice.txt.bz2 new file mode 100644 index 000000000..adc90f2e2 Binary files /dev/null and b/filter/testdata/libreoffice.txt.bz2 differ