Merge pull request #1912 from askielboe/select-funcs

Support for different kinds of select functions
This commit is contained in:
Alexander Neumann 2018-08-12 19:26:36 +02:00
commit 6b9dde3ce8
6 changed files with 110 additions and 56 deletions

View File

@ -0,0 +1,14 @@
Enhancement: Reject files/dirs by name first
The current scanner/archiver code had an architectural limitation: it always
ran the `lstat()` system call on all files and directories before a decision to
include/exclude the file/dir was made. This lead to a lot of unnecessary system
calls for items that could have been rejected by their name or path only.
We've changed the archiver/scanner implementation so that it now first rejects
by name/path, and only runs the system call on the remaining items. This
reduces the number of `lstat()` system calls a lot (depending on the exclude
settings).
https://github.com/restic/restic/issues/1909
https://github.com/restic/restic/pull/1912

View File

@ -186,18 +186,9 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
return nil return nil
} }
// collectRejectFuncs returns a list of all functions which may reject data // collectRejectByNameFuncs returns a list of all functions which may reject data
// from being saved in a snapshot // from being saved in a snapshot based on path only
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) { func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectByNameFunc, err error) {
// allowed devices
if opts.ExcludeOtherFS && !opts.Stdin {
f, err := rejectByDevice(targets)
if err != nil {
return nil, err
}
fs = append(fs, f)
}
// exclude restic cache // exclude restic cache
if repo.Cache != nil { if repo.Cache != nil {
f, err := rejectResticCache(repo) f, err := rejectResticCache(repo)
@ -237,6 +228,21 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets
return fs, nil return fs, nil
} }
// collectRejectFuncs returns a list of all functions which may reject data
// from being saved in a snapshot based on path and file info
func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets []string) (fs []RejectFunc, err error) {
// allowed devices
if opts.ExcludeOtherFS && !opts.Stdin {
f, err := rejectByDevice(targets)
if err != nil {
return nil, err
}
fs = append(fs, f)
}
return fs, nil
}
// readExcludePatternsFromFiles reads all exclude files and returns the list of // readExcludePatternsFromFiles reads all exclude files and returns the list of
// exclude patterns. For each line, leading and trailing white space is removed // exclude patterns. For each line, leading and trailing white space is removed
// and comment lines are ignored. For each remaining pattern, environment // and comment lines are ignored. For each remaining pattern, environment
@ -393,7 +399,13 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
return err return err
} }
// rejectFuncs collect functions that can reject items from the backup // rejectByNameFuncs collect functions that can reject items from the backup based on path only
rejectByNameFuncs, err := collectRejectByNameFuncs(opts, repo, targets)
if err != nil {
return err
}
// rejectFuncs collect functions that can reject items from the backup based on path and file info
rejectFuncs, err := collectRejectFuncs(opts, repo, targets) rejectFuncs, err := collectRejectFuncs(opts, repo, targets)
if err != nil { if err != nil {
return err return err
@ -414,6 +426,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
p.V("using parent snapshot %v\n", parentSnapshotID.Str()) p.V("using parent snapshot %v\n", parentSnapshotID.Str())
} }
selectByNameFilter := func(item string) bool {
for _, reject := range rejectByNameFuncs {
if reject(item) {
return false
}
}
return true
}
selectFilter := func(item string, fi os.FileInfo) bool { selectFilter := func(item string, fi os.FileInfo) bool {
for _, reject := range rejectFuncs { for _, reject := range rejectFuncs {
if reject(item, fi) { if reject(item, fi) {
@ -436,6 +457,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
} }
sc := archiver.NewScanner(targetFS) sc := archiver.NewScanner(targetFS)
sc.SelectByName = selectByNameFilter
sc.Select = selectFilter sc.Select = selectFilter
sc.Error = p.ScannerError sc.Error = p.ScannerError
sc.Result = p.ReportTotal sc.Result = p.ReportTotal
@ -444,6 +466,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) }) t.Go(func() error { return sc.Scan(t.Context(gopts.ctx), targets) })
arch := archiver.New(repo, targetFS, archiver.Options{}) arch := archiver.New(repo, targetFS, archiver.Options{})
arch.SelectByName = selectByNameFilter
arch.Select = selectFilter arch.Select = selectFilter
arch.WithAtime = opts.WithAtime arch.WithAtime = opts.WithAtime
arch.Error = p.Error arch.Error = p.Error

View File

@ -60,15 +60,20 @@ func (rc *rejectionCache) Store(dir string, rejected bool) {
rc.m[dir] = rejected rc.m[dir] = rejected
} }
// RejectByNameFunc is a function that takes a filename of a
// file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup.
type RejectByNameFunc func(path string) bool
// RejectFunc is a function that takes a filename and os.FileInfo of a // RejectFunc is a function that takes a filename and os.FileInfo of a
// file that would be included in the backup. The function returns true if it // file that would be included in the backup. The function returns true if it
// should be excluded (rejected) from the backup. // should be excluded (rejected) from the backup.
type RejectFunc func(path string, fi os.FileInfo) bool type RejectFunc func(path string, fi os.FileInfo) bool
// rejectByPattern returns a RejectFunc which rejects files that match // rejectByPattern returns a RejectByNameFunc which rejects files that match
// one of the patterns. // one of the patterns.
func rejectByPattern(patterns []string) RejectFunc { func rejectByPattern(patterns []string) RejectByNameFunc {
return func(item string, fi os.FileInfo) bool { return func(item string) bool {
matched, _, err := filter.List(patterns, item) matched, _, err := filter.List(patterns, item)
if err != nil { if err != nil {
Warnf("error for exclude pattern: %v", err) Warnf("error for exclude pattern: %v", err)
@ -83,14 +88,14 @@ func rejectByPattern(patterns []string) RejectFunc {
} }
} }
// rejectIfPresent returns a RejectFunc which itself returns whether a path // rejectIfPresent returns a RejectByNameFunc which itself returns whether a path
// should be excluded. The RejectFunc considers a file to be excluded when // should be excluded. The RejectByNameFunc considers a file to be excluded when
// it resides in a directory with an exclusion file, that is specified by // it resides in a directory with an exclusion file, that is specified by
// excludeFileSpec in the form "filename[:content]". The returned error is // excludeFileSpec in the form "filename[:content]". The returned error is
// non-nil if the filename component of excludeFileSpec is empty. If rc is // non-nil if the filename component of excludeFileSpec is empty. If rc is
// non-nil, it is going to be used in the RejectFunc to expedite the evaluation // non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation
// of a directory based on previous visits. // of a directory based on previous visits.
func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) { func rejectIfPresent(excludeFileSpec string) (RejectByNameFunc, error) {
if excludeFileSpec == "" { if excludeFileSpec == "" {
return nil, errors.New("name for exclusion tagfile is empty") return nil, errors.New("name for exclusion tagfile is empty")
} }
@ -107,7 +112,7 @@ func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) {
} }
debug.Log("using %q as exclusion tagfile", tf) debug.Log("using %q as exclusion tagfile", tf)
rc := &rejectionCache{} rc := &rejectionCache{}
fn := func(filename string, _ os.FileInfo) bool { fn := func(filename string) bool {
return isExcludedByFile(filename, tf, tc, rc) return isExcludedByFile(filename, tf, tc, rc)
} }
return fn, nil return fn, nil
@ -252,11 +257,11 @@ func rejectByDevice(samples []string) (RejectFunc, error) {
}, nil }, nil
} }
// rejectResticCache returns a RejectFunc that rejects the restic cache // rejectResticCache returns a RejectByNameFunc that rejects the restic cache
// directory (if set). // directory (if set).
func rejectResticCache(repo *repository.Repository) (RejectFunc, error) { func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
if repo.Cache == nil { if repo.Cache == nil {
return func(string, os.FileInfo) bool { return func(string) bool {
return false return false
}, nil }, nil
} }
@ -266,7 +271,7 @@ func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
return nil, errors.New("cacheBase is empty string") return nil, errors.New("cacheBase is empty string")
} }
return func(item string, _ os.FileInfo) bool { return func(item string) bool {
if fs.HasPathPrefix(cacheBase, item) { if fs.HasPathPrefix(cacheBase, item) {
debug.Log("rejecting restic cache directory %v", item) debug.Log("rejecting restic cache directory %v", item)
return true return true

View File

@ -27,7 +27,7 @@ func TestRejectByPattern(t *testing.T) {
for _, tc := range tests { for _, tc := range tests {
t.Run("", func(t *testing.T) { t.Run("", func(t *testing.T) {
reject := rejectByPattern(patterns) reject := rejectByPattern(patterns)
res := reject(tc.filename, nil) res := reject(tc.filename)
if res != tc.reject { if res != tc.reject {
t.Fatalf("wrong result for filename %v: want %v, got %v", t.Fatalf("wrong result for filename %v: want %v, got %v",
tc.filename, tc.reject, res) tc.filename, tc.reject, res)
@ -140,8 +140,8 @@ func TestMultipleIsExcludedByFile(t *testing.T) {
if err != nil { if err != nil {
return err return err
} }
excludedByFoo := fooExclude(p, fi) excludedByFoo := fooExclude(p)
excludedByBar := barExclude(p, fi) excludedByBar := barExclude(p)
excluded := excludedByFoo || excludedByBar excluded := excludedByFoo || excludedByBar
// the log message helps debugging in case the test fails // the log message helps debugging in case the test fails
t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded)

View File

@ -16,6 +16,10 @@ import (
tomb "gopkg.in/tomb.v2" tomb "gopkg.in/tomb.v2"
) )
// SelectByNameFunc returns true for all items that should be included (files and
// dirs). If false is returned, files are ignored and dirs are not even walked.
type SelectByNameFunc func(item string) bool
// SelectFunc returns true for all items that should be included (files and // SelectFunc returns true for all items that should be included (files and
// dirs). If false is returned, files are ignored and dirs are not even walked. // dirs). If false is returned, files are ignored and dirs are not even walked.
type SelectFunc func(item string, fi os.FileInfo) bool type SelectFunc func(item string, fi os.FileInfo) bool
@ -44,6 +48,7 @@ func (s *ItemStats) Add(other ItemStats) {
// Archiver saves a directory structure to the repo. // Archiver saves a directory structure to the repo.
type Archiver struct { type Archiver struct {
Repo restic.Repository Repo restic.Repository
SelectByName SelectByNameFunc
Select SelectFunc Select SelectFunc
FS fs.FS FS fs.FS
Options Options Options Options
@ -120,7 +125,8 @@ func (o Options) ApplyDefaults() Options {
func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver { func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
arch := &Archiver{ arch := &Archiver{
Repo: repo, Repo: repo,
Select: func(string, os.FileInfo) bool { return true }, SelectByName: func(item string) bool { return true },
Select: func(item string, fi os.FileInfo) bool { return true },
FS: fs, FS: fs,
Options: opts.ApplyDefaults(), Options: opts.ApplyDefaults(),
@ -297,7 +303,7 @@ func (fn *FutureNode) wait(ctx context.Context) {
// excluded, this function returns a nil node and error, with excluded set to // excluded, this function returns a nil node and error, with excluded set to
// true. // true.
// //
// Errors and completion is needs to be handled by the caller. // Errors and completion needs to be handled by the caller.
// //
// snPath is the path within the current snapshot. // snPath is the path within the current snapshot.
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) { func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
@ -316,6 +322,13 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
fn.absTarget = abstarget fn.absTarget = abstarget
// exclude files by path before running Lstat to reduce number of lstat calls
if !arch.SelectByName(abstarget) {
debug.Log("%v is excluded by path", target)
return FutureNode{}, true, nil
}
// get file info and run remaining select functions that require file information
fi, err := arch.FS.Lstat(target) fi, err := arch.FS.Lstat(target)
if !arch.Select(abstarget, fi) { if !arch.Select(abstarget, fi) {
debug.Log("%v is excluded", target) debug.Log("%v is excluded", target)

View File

@ -13,6 +13,7 @@ import (
// items should be included. Error is called when an error occurs. // items should be included. Error is called when an error occurs.
type Scanner struct { type Scanner struct {
FS fs.FS FS fs.FS
SelectByName SelectByNameFunc
Select SelectFunc Select SelectFunc
Error ErrorFunc Error ErrorFunc
Result func(item string, s ScanStats) Result func(item string, s ScanStats)
@ -22,12 +23,9 @@ type Scanner struct {
func NewScanner(fs fs.FS) *Scanner { func NewScanner(fs fs.FS) *Scanner {
return &Scanner{ return &Scanner{
FS: fs, FS: fs,
Select: func(item string, fi os.FileInfo) bool { SelectByName: func(item string) bool { return true },
return true Select: func(item string, fi os.FileInfo) bool { return true },
}, Error: func(item string, fi os.FileInfo, err error) error { return err },
Error: func(item string, fi os.FileInfo, err error) error {
return err
},
Result: func(item string, s ScanStats) {}, Result: func(item string, s ScanStats) {},
} }
} }
@ -70,17 +68,18 @@ func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (Sca
return stats, ctx.Err() return stats, ctx.Err()
} }
fi, err := s.FS.Lstat(target) // exclude files by path before running stat to reduce number of lstat calls
if err != nil { if !s.SelectByName(target) {
// ignore error if the target is to be excluded anyway
if !s.Select(target, nil) {
return stats, nil return stats, nil
} }
// else return filtered error // get file information
fi, err := s.FS.Lstat(target)
if err != nil {
return stats, s.Error(target, fi, err) return stats, s.Error(target, fi, err)
} }
// run remaining select functions that require file information
if !s.Select(target, fi) { if !s.Select(target, fi) {
return stats, nil return stats, nil
} }