mirror of
https://github.com/restic/restic.git
synced 2024-12-21 23:33:03 +00:00
Issue 4278:
Enhancement: include filters in command rewrite
This commit is contained in:
parent
6808004ad1
commit
84d81e720f
4 changed files with 267 additions and 11 deletions
31
changelog/unreleased/issue-4278
Normal file
31
changelog/unreleased/issue-4278
Normal file
|
@ -0,0 +1,31 @@
|
|||
Enhancement: include include filters in command rewrite
|
||||
|
||||
The enhancement allows the standard include filter options
|
||||
--iinclude pattern same as --include pattern but ignores the casing of filenames
|
||||
--iinclude-file file same as --include-file but ignores casing of filenames in patterns
|
||||
-i, --include pattern include a pattern (can be specified multiple times)
|
||||
--include-file file read include patterns from a file (can be specified multiple times)
|
||||
|
||||
In addition the option
|
||||
-s, --snapshot-summary create snapshot summary record if it does not exist
|
||||
|
||||
To improve space performance of the newly created snapshot via the include filter variants,
|
||||
it is recommended to utilize the option
|
||||
-X, --exclude-empty exclude empty directories from being created, needs a second walk through the tree
|
||||
|
||||
The exclusion or inclusion of filter parameters is exclusive, as in other commands
|
||||
which use both include and exclude filters.
|
||||
|
||||
In order to make the include filter work efficiently, an additional read pass through the
|
||||
directory tree is needed to identify the subdirectories and their parents for the
|
||||
inclusion of files. Otherwise the full directory tree needs to be included
|
||||
which as a consequence may contain quite a lot of empty subdirectories. The additiinal read pass
|
||||
avoids this issue, but it might take a bit more time, depending on the network speed of
|
||||
the backend storage and the size of the snapshot.
|
||||
|
||||
The --snapshot-summary parameter adds summary data to the snapshot summary section, as already
|
||||
described in the solution to issue 4942.
|
||||
|
||||
https://github.com/restic/restic/issues/4278
|
||||
https://github.com/restic/restic/issues/4942
|
||||
https://github.com/restic/restic/pull/5185
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
@ -19,8 +20,9 @@ var cmdRewrite = &cobra.Command{
|
|||
Use: "rewrite [flags] [snapshotID ...]",
|
||||
Short: "Rewrite snapshots to exclude unwanted files",
|
||||
Long: `
|
||||
The "rewrite" command excludes files from existing snapshots. It creates new
|
||||
snapshots containing the same data as the original ones, but without the files
|
||||
The "rewrite" command excludes files from existing snapshots.
|
||||
Alternatively you can use rewrite command to include only wanted files and directories.
|
||||
It creates new snapshots containing the same data as the original ones, but without the files
|
||||
you specify to exclude. All metadata (time, host, tags) will be preserved.
|
||||
|
||||
The snapshots to rewrite are specified using the --host, --tag and --path options,
|
||||
|
@ -35,6 +37,21 @@ Please note that the --forget option only removes the snapshots and not the actu
|
|||
data stored in the repository. In order to delete the no longer referenced data,
|
||||
use the "prune" command.
|
||||
|
||||
The option --snapshot-summary [-s] creates a new snapshot with snapshot summary data attached.
|
||||
Only the two fields TotalFilesProcessed and TotalBytesProcessed are non-zero.
|
||||
|
||||
For the include option to work more efficiently, it os advisable to use the flag
|
||||
'--exclude-empty' so only directories needed will be included from the original
|
||||
snapshot. Otherwise all directories from the original snapshot have to be included.
|
||||
This however will produce an extra Walk() through the original snapshot tree.
|
||||
|
||||
In order to make the include filter work efficiently, an additional read pass through the
|
||||
directory tree is needed to identify the subdirectories and their parents for the
|
||||
inclusion of files to work effectively. Otherwise the full directory tree needs to be included
|
||||
which may contain quite a lot of empty subdirectories. The first read pass
|
||||
avoids this issue, but it might take a bit more time, depending on the network speed of
|
||||
the backend storage and the size of the snapshot.
|
||||
|
||||
EXIT STATUS
|
||||
===========
|
||||
|
||||
|
@ -83,12 +100,15 @@ func (sma snapshotMetadataArgs) convert() (*snapshotMetadata, error) {
|
|||
|
||||
// RewriteOptions collects all options for the rewrite command.
|
||||
type RewriteOptions struct {
|
||||
Forget bool
|
||||
DryRun bool
|
||||
Forget bool
|
||||
DryRun bool
|
||||
SnapshotSummary bool
|
||||
ExcludeEmptyDir bool
|
||||
|
||||
Metadata snapshotMetadataArgs
|
||||
restic.SnapshotFilter
|
||||
filter.ExcludePatternOptions
|
||||
filter.IncludePatternOptions
|
||||
}
|
||||
|
||||
var rewriteOptions RewriteOptions
|
||||
|
@ -101,13 +121,21 @@ func init() {
|
|||
f.BoolVarP(&rewriteOptions.DryRun, "dry-run", "n", false, "do not do anything, just print what would be done")
|
||||
f.StringVar(&rewriteOptions.Metadata.Hostname, "new-host", "", "replace hostname")
|
||||
f.StringVar(&rewriteOptions.Metadata.Time, "new-time", "", "replace time of the backup")
|
||||
f.BoolVarP(&rewriteOptions.SnapshotSummary, "snapshot-summary", "s", false, "create snapshot summary record if it does not exist")
|
||||
f.BoolVarP(&rewriteOptions.ExcludeEmptyDir, "exclude-empty", "X", false, "only for include patterns: exclude empty directories from being created, needs a second walk through the tree")
|
||||
|
||||
initMultiSnapshotFilter(f, &rewriteOptions.SnapshotFilter, true)
|
||||
rewriteOptions.ExcludePatternOptions.Add(f)
|
||||
rewriteOptions.IncludePatternOptions.Add(f)
|
||||
}
|
||||
|
||||
type rewriteFilterFunc func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error)
|
||||
|
||||
type DirectoryNeeded struct {
|
||||
node *restic.Node
|
||||
needed bool
|
||||
}
|
||||
|
||||
func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot, opts RewriteOptions) (bool, error) {
|
||||
if sn.Tree == nil {
|
||||
return false, errors.Errorf("snapshot %v has nil tree", sn.ID().Str())
|
||||
|
@ -118,12 +146,103 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti
|
|||
return false, err
|
||||
}
|
||||
|
||||
includeByNameFuncs, err := opts.IncludePatternOptions.CollectPatterns(Warnf)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
metadata, err := opts.Metadata.convert()
|
||||
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// walk the complete snapshot tree and memorize the directory structure
|
||||
directoriesNeeded := map[string]DirectoryNeeded{}
|
||||
if opts.ExcludeEmptyDir {
|
||||
err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error {
|
||||
if err != nil {
|
||||
Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID().Str(), err)
|
||||
return walker.ErrSkipNode
|
||||
}
|
||||
|
||||
if node == nil {
|
||||
return nil
|
||||
} else if node.Type == restic.NodeTypeDir {
|
||||
directoriesNeeded[nodepath] = DirectoryNeeded{
|
||||
node: node,
|
||||
needed: false,
|
||||
}
|
||||
// filter directories
|
||||
for _, include := range includeByNameFuncs {
|
||||
matched, childMayMatch := include(nodepath)
|
||||
if matched && childMayMatch {
|
||||
parentData := directoriesNeeded[nodepath]
|
||||
if !parentData.needed { // flip 'needed' bit: off->on
|
||||
directoriesNeeded[nodepath] = DirectoryNeeded{
|
||||
node: parentData.node,
|
||||
needed: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // include filter processsing - filter file names
|
||||
for _, include := range includeByNameFuncs {
|
||||
if node.Type == restic.NodeTypeFile {
|
||||
matched, childMayMatch := include(nodepath)
|
||||
if matched && childMayMatch {
|
||||
dirpath := filepath.Dir(nodepath) // parent path
|
||||
parentData := directoriesNeeded[dirpath]
|
||||
if !parentData.needed { // flip 'needed' bit: off->on
|
||||
directoriesNeeded[dirpath] = DirectoryNeeded{
|
||||
node: parentData.node,
|
||||
needed: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}}) // end walker.Walk
|
||||
|
||||
if err != nil {
|
||||
Printf("walker.Walk does not want to run for snapshot %s - reason %v\n", sn.ID().Str(), err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// go over all directory structure an find all parent nodes needed
|
||||
for { // ever
|
||||
more := false
|
||||
for dirpath, dirData := range directoriesNeeded {
|
||||
if !dirData.needed {
|
||||
continue
|
||||
}
|
||||
|
||||
parentPath := filepath.Dir(dirpath)
|
||||
// TODO: don't know how this is expressed for Windows
|
||||
if parentPath == "/" {
|
||||
continue
|
||||
}
|
||||
|
||||
value := directoriesNeeded[parentPath]
|
||||
if value.needed {
|
||||
continue
|
||||
}
|
||||
|
||||
directoriesNeeded[parentPath] = DirectoryNeeded{
|
||||
node: value.node,
|
||||
needed: true,
|
||||
}
|
||||
more = true
|
||||
} // all directories in snapshot
|
||||
|
||||
if !more {
|
||||
break
|
||||
}
|
||||
} // for ever
|
||||
} // opts.ExcludeEmptyDir
|
||||
|
||||
var filter rewriteFilterFunc
|
||||
|
||||
if len(rejectByNameFuncs) > 0 {
|
||||
|
@ -152,14 +271,99 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti
|
|||
return restic.ID{}, err
|
||||
}
|
||||
ss := querySize()
|
||||
if sn.Summary != nil {
|
||||
sn.Summary.TotalFilesProcessed = ss.FileCount
|
||||
sn.Summary.TotalBytesProcessed = ss.FileSize
|
||||
if sn.Summary == nil { // change of logic: create summary if it wasn't there before
|
||||
sn.Summary = &restic.SnapshotSummary{}
|
||||
}
|
||||
sn.Summary.DataBlobs = ss.DataBlobs
|
||||
sn.Summary.TreeBlobs = ss.TreeBlobs
|
||||
sn.Summary.TotalFilesProcessed = ss.FileCount
|
||||
sn.Summary.TotalBytesProcessed = ss.FileSize
|
||||
return id, err
|
||||
}
|
||||
|
||||
} else if len(includeByNameFuncs) > 0 {
|
||||
selectByName := func(nodepath string, node *restic.Node) bool {
|
||||
for _, include := range includeByNameFuncs {
|
||||
if node.Type == restic.NodeTypeDir {
|
||||
if opts.ExcludeEmptyDir {
|
||||
return directoriesNeeded[nodepath].needed
|
||||
} else {
|
||||
// include directories unconditionally
|
||||
return true
|
||||
}
|
||||
} else if node.Type == restic.NodeTypeFile {
|
||||
ifun, childMayMatch := include(nodepath)
|
||||
if ifun && childMayMatch {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
rewriteNode := func(node *restic.Node, path string) *restic.Node {
|
||||
if selectByName(path, node) {
|
||||
Verboseff("including %s\n", path)
|
||||
return node
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode)
|
||||
|
||||
filter = func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error) {
|
||||
id, err := rewriter.RewriteTree(ctx, repo, "/", *sn.Tree)
|
||||
if err != nil {
|
||||
return restic.ID{}, err
|
||||
}
|
||||
ss := querySize()
|
||||
if sn.Summary == nil {
|
||||
sn.Summary = &restic.SnapshotSummary{}
|
||||
}
|
||||
sn.Summary.DataBlobs = ss.DataBlobs
|
||||
sn.Summary.TreeBlobs = ss.TreeBlobs
|
||||
sn.Summary.TotalFilesProcessed = ss.FileCount
|
||||
sn.Summary.TotalBytesProcessed = ss.FileSize
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
} else if opts.SnapshotSummary {
|
||||
if sn.Summary != nil {
|
||||
Printf("snapshot %s has already got snapshot summary data\n", sn.ID().Str())
|
||||
return false, nil
|
||||
}
|
||||
|
||||
rewriteNode := func(node *restic.Node, path string) *restic.Node {
|
||||
return node
|
||||
}
|
||||
|
||||
rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode)
|
||||
|
||||
filter = func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error) {
|
||||
id, err := rewriter.RewriteTree(ctx, repo, "/", *sn.Tree)
|
||||
if err != nil {
|
||||
return restic.ID{}, err
|
||||
}
|
||||
ss := querySize()
|
||||
if sn.Summary == nil {
|
||||
sn.Summary = &restic.SnapshotSummary{}
|
||||
}
|
||||
sn.Summary.DataBlobs = ss.DataBlobs
|
||||
sn.Summary.TreeBlobs = ss.TreeBlobs
|
||||
sn.Summary.TotalFilesProcessed = ss.FileCount
|
||||
sn.Summary.TotalBytesProcessed = ss.FileSize
|
||||
Verbosef("dataBlobs %12d\n", ss.DataBlobs)
|
||||
Verbosef("treeBlobs %12d\n", ss.TreeBlobs)
|
||||
Verbosef("totalFilesProcessed %12d\n", ss.FileCount)
|
||||
Verbosef("totalBytesProcessed %12d\n", ss.FileSize)
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
} else {
|
||||
// TODO: question: should metadata modification be changed so that
|
||||
// snapshot summary data will always be created??
|
||||
filter = func(_ context.Context, sn *restic.Snapshot) (restic.ID, error) {
|
||||
return *sn.Tree, nil
|
||||
}
|
||||
|
@ -203,7 +407,7 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *r
|
|||
return true, nil
|
||||
}
|
||||
|
||||
if filteredTree == *sn.Tree && newMetadata == nil {
|
||||
if filteredTree == *sn.Tree && newMetadata == nil && sn.Summary == nil {
|
||||
debug.Log("Snapshot %v not modified", sn)
|
||||
return false, nil
|
||||
}
|
||||
|
@ -230,6 +434,7 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *r
|
|||
// Always set the original snapshot id as this essentially a new snapshot.
|
||||
sn.Original = sn.ID()
|
||||
sn.Tree = &filteredTree
|
||||
sn.ProgramVersion = version
|
||||
|
||||
if !forget {
|
||||
sn.AddTags([]string{addTag})
|
||||
|
@ -263,8 +468,18 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *r
|
|||
}
|
||||
|
||||
func runRewrite(ctx context.Context, opts RewriteOptions, gopts GlobalOptions, args []string) error {
|
||||
if opts.ExcludePatternOptions.Empty() && opts.Metadata.empty() {
|
||||
return errors.Fatal("Nothing to do: no excludes provided and no new metadata provided")
|
||||
exEmpty := opts.ExcludePatternOptions.Empty()
|
||||
inEmpty := opts.IncludePatternOptions.Empty()
|
||||
if !opts.SnapshotSummary && exEmpty && inEmpty && opts.Metadata.empty() {
|
||||
return errors.Fatal("Nothing to do: no includes/excludes provided and no new metadata provided")
|
||||
}
|
||||
|
||||
if !exEmpty && !inEmpty {
|
||||
return errors.Fatal("You cannot specify include and exclude options simultaneously!")
|
||||
}
|
||||
|
||||
if opts.SnapshotSummary && (!exEmpty || !inEmpty) {
|
||||
Warnf("option --snapshot-summary is ignored with include/exclude options\n")
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
|
@ -25,6 +25,10 @@ func (opts *IncludePatternOptions) Add(f *pflag.FlagSet) {
|
|||
f.StringArrayVar(&opts.InsensitiveIncludeFiles, "iinclude-file", nil, "same as --include-file but ignores casing of `file`names in patterns")
|
||||
}
|
||||
|
||||
func (opts *IncludePatternOptions) Empty() bool {
|
||||
return len(opts.Includes) == 0 && len(opts.InsensitiveIncludes) == 0 && len(opts.IncludeFiles) == 0 && len(opts.InsensitiveIncludeFiles) == 0
|
||||
}
|
||||
|
||||
func (opts IncludePatternOptions) CollectPatterns(warnf func(msg string, args ...interface{})) ([]IncludeByNameFunc, error) {
|
||||
var fs []IncludeByNameFunc
|
||||
if len(opts.IncludeFiles) > 0 {
|
||||
|
|
|
@ -16,6 +16,8 @@ type QueryRewrittenSizeFunc func() SnapshotSize
|
|||
type SnapshotSize struct {
|
||||
FileCount uint
|
||||
FileSize uint64
|
||||
TreeBlobs int
|
||||
DataBlobs int
|
||||
}
|
||||
|
||||
type RewriteOpts struct {
|
||||
|
@ -61,6 +63,7 @@ func NewTreeRewriter(opts RewriteOpts) *TreeRewriter {
|
|||
func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryRewrittenSizeFunc) {
|
||||
var count uint
|
||||
var size uint64
|
||||
var treeBlobs, dataBlobs int
|
||||
|
||||
t := NewTreeRewriter(RewriteOpts{
|
||||
RewriteNode: func(node *restic.Node, path string) *restic.Node {
|
||||
|
@ -68,6 +71,9 @@ func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryR
|
|||
if node != nil && node.Type == restic.NodeTypeFile {
|
||||
count++
|
||||
size += node.Size
|
||||
dataBlobs += len(node.Content)
|
||||
} else if node != nil && node.Type == restic.NodeTypeDir {
|
||||
treeBlobs++
|
||||
}
|
||||
return node
|
||||
},
|
||||
|
@ -75,7 +81,7 @@ func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc) (*TreeRewriter, QueryR
|
|||
})
|
||||
|
||||
ss := func() SnapshotSize {
|
||||
return SnapshotSize{count, size}
|
||||
return SnapshotSize{count, size, treeBlobs, dataBlobs}
|
||||
}
|
||||
|
||||
return t, ss
|
||||
|
|
Loading…
Reference in a new issue