From 8b8bd4e8aceae36b8d78a658d9ccf143ef752aca Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 10 Apr 2022 14:11:01 +0200 Subject: [PATCH 1/6] check: complain about mixed pack files --- cmd/restic/cmd_check.go | 7 +++++ internal/checker/checker.go | 14 ++++++++++ internal/checker/checker_test.go | 44 +++++++++++++++----------------- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 1bc9da687..093a1ac99 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -221,11 +221,15 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { errorsFound := false suggestIndexRebuild := false + mixedFound := false for _, hint := range hints { switch hint.(type) { case *checker.ErrDuplicatePacks, *checker.ErrOldIndexFormat: Printf("%v\n", hint) suggestIndexRebuild = true + case *checker.ErrMixedPack: + Printf("%v\n", hint) + mixedFound = true default: Warnf("error: %v\n", hint) errorsFound = true @@ -235,6 +239,9 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { if suggestIndexRebuild { Printf("This is non-critical, you can run `restic rebuild-index' to correct this\n") } + if mixedFound { + Printf("Mixed packs with tree and data blobs are non-critical, you can run `restic prune` to correct this.\n") + } if len(errs) > 0 { for _, err := range errs { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index b972408f5..e479f0aff 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -66,6 +66,15 @@ func (e *ErrDuplicatePacks) Error() string { return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID, e.Indexes) } +// ErrMixedPack is returned when a pack is found that contains both tree and data blobs. +type ErrMixedPack struct { + PackID restic.ID +} + +func (e *ErrMixedPack) Error() string { + return fmt.Sprintf("pack %v contains a mix of tree and data blobs", e.PackID.Str()) +} + // ErrOldIndexFormat is returned when an index with the old format is // found. type ErrOldIndexFormat struct { @@ -141,6 +150,11 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { Indexes: packToIndex[packID], }) } + if c.masterIndex.IsMixedPack(packID) { + hints = append(hints, &ErrMixedPack{ + PackID: packID, + }) + } } err = c.repo.SetIndex(c.masterIndex) diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 975415a87..c82375e3c 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -63,6 +63,14 @@ func checkData(chkr *checker.Checker) []error { ) } +func assertOnlyMixedPackHints(t *testing.T, hints []error) { + for _, err := range hints { + if _, ok := err.(*checker.ErrMixedPack); !ok { + t.Fatalf("expected mixed pack hint, got %v", err) + } + } +} + func TestCheckRepo(t *testing.T) { repodir, cleanup := test.Env(t, checkerTestData) defer cleanup() @@ -74,9 +82,9 @@ func TestCheckRepo(t *testing.T) { if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) } - - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) + assertOnlyMixedPackHints(t, hints) + if len(hints) == 0 { + t.Fatal("expected mixed pack warnings, got none") } test.OKs(t, checkPacks(chkr)) @@ -100,10 +108,7 @@ func TestMissingPack(t *testing.T) { if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) } - - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) - } + assertOnlyMixedPackHints(t, hints) errs = checkPacks(chkr) @@ -136,10 +141,7 @@ func TestUnreferencedPack(t *testing.T) { if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) } - - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) - } + assertOnlyMixedPackHints(t, hints) errs = checkPacks(chkr) @@ -181,10 +183,7 @@ func TestUnreferencedBlobs(t *testing.T) { if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) } - - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) - } + assertOnlyMixedPackHints(t, hints) test.OKs(t, checkPacks(chkr)) test.OKs(t, checkStruct(chkr)) @@ -269,9 +268,7 @@ func TestModifiedIndex(t *testing.T) { t.Logf("found expected error %v", err) } - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) - } + assertOnlyMixedPackHints(t, hints) } var checkerDuplicateIndexTestData = filepath.Join("testdata", "duplicate-packs-in-index-test-repo.tar.gz") @@ -421,10 +418,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) { if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) } - - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) - } + assertOnlyMixedPackHints(t, hints) test.OKs(t, checkPacks(chkr)) test.OKs(t, checkStruct(chkr)) @@ -572,8 +566,10 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun t.Fatalf("expected no errors, got %v: %v", len(errs), errs) } - if len(hints) > 0 { - t.Errorf("expected no hints, got %v: %v", len(hints), hints) + for _, err := range hints { + if _, ok := err.(*checker.ErrMixedPack); !ok { + t.Fatalf("expected mixed pack hint, got %v", err) + } } return chkr, repo, cleanup } From fcb3ddf181b684142fd43a67e2b35c38056d2f33 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 10 Apr 2022 14:11:48 +0200 Subject: [PATCH 2/6] check: Complain about usage of s3 legacy layout --- cmd/restic/cmd_check.go | 8 +++++--- internal/checker/checker.go | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 093a1ac99..e32a4a4d0 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -260,10 +260,12 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { if checker.IsOrphanedPack(err) { orphanedPacks++ Verbosef("%v\n", err) - continue + } else if _, ok := err.(*checker.ErrLegacyLayout); ok { + Verbosef("repository still uses the S3 legacy layout\nPlease run `restic migrate s3legacy` to correct this.\n") + } else { + errorsFound = true + Warnf("%v\n", err) } - errorsFound = true - Warnf("error: %v\n", err) } if orphanedPacks > 0 { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index e479f0aff..0c4d84f94 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -13,6 +13,7 @@ import ( "github.com/minio/sha256-simd" "github.com/restic/restic/internal/backend" + "github.com/restic/restic/internal/backend/s3" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/hashing" @@ -56,6 +57,13 @@ func New(repo restic.Repository, trackUnused bool) *Checker { return c } +// ErrLegacyLayout is returned when the repository uses the S3 legacy layout. +type ErrLegacyLayout struct{} + +func (e *ErrLegacyLayout) Error() string { + return "repository uses S3 legacy layout" +} + // ErrDuplicatePacks is returned when a pack is found in more than one index. type ErrDuplicatePacks struct { PackID restic.ID @@ -184,12 +192,25 @@ func IsOrphanedPack(err error) bool { return errors.As(err, &e) && e.Orphaned } +func isS3Legacy(b restic.Backend) bool { + be, ok := b.(*s3.Backend) + if !ok { + return false + } + + return be.Layout.Name() == "s3legacy" +} + // Packs checks that all packs referenced in the index are still available and // there are no packs that aren't in an index. errChan is closed after all // packs have been checked. func (c *Checker) Packs(ctx context.Context, errChan chan<- error) { defer close(errChan) + if isS3Legacy(c.repo.Backend()) { + errChan <- &ErrLegacyLayout{} + } + debug.Log("checking for %d packs", len(c.packs)) debug.Log("listing repository packs") From 768c890fcbbfa3512fc3084352b1b583f8518d06 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 10 Apr 2022 14:20:03 +0200 Subject: [PATCH 3/6] check: Deprecate `--check-unused` Unused blobs are not a problem but rather expected to exist now that prune by default does not remove every unused blob. However, the option has caused questions from users whether a repository is damaged or not, so just remove that option. Note that the remaining code is left intact as it is still useful for our test cases. --- cmd/restic/cmd_check.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index e32a4a4d0..60c4e19c5 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -57,7 +57,13 @@ func init() { f := cmdCheck.Flags() f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs") f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read a `subset` of data packs, specified as 'n/t' for specific part, or either 'x%' or 'x.y%' or a size in bytes with suffixes k/K, m/M, g/G, t/T for a random subset") - f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs") + var ignored bool + f.BoolVar(&ignored, "check-unused", false, "find unused blobs") + err := f.MarkDeprecated("check-unused", "`--check-unused` is deprecated and will be ignored") + if err != nil { + // MarkDeprecated only returns an error when the flag is not found + panic(err) + } f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache") } From 04e49924fb5165862973e3b61650abfb83f54970 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 1 May 2022 14:43:26 +0200 Subject: [PATCH 4/6] checker: Fix S3 legacy layout detection --- internal/checker/checker.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 0c4d84f94..0e4310c95 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -14,6 +14,7 @@ import ( "github.com/minio/sha256-simd" "github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/backend/s3" + "github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/hashing" @@ -193,6 +194,11 @@ func IsOrphanedPack(err error) bool { } func isS3Legacy(b restic.Backend) bool { + // unwrap cache + if be, ok := b.(*cache.Backend); ok { + b = be.Backend + } + be, ok := b.(*s3.Backend) if !ok { return false From 5a6f2f9fa059fea1b7de697f2dcb88781d80f23e Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 1 May 2022 14:52:00 +0200 Subject: [PATCH 5/6] Fix S3 legacy layout migration --- internal/migrations/s3_layout.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/internal/migrations/s3_layout.go b/internal/migrations/s3_layout.go index 0cfd40e60..afb14b848 100644 --- a/internal/migrations/s3_layout.go +++ b/internal/migrations/s3_layout.go @@ -8,6 +8,7 @@ import ( "github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/backend/s3" + "github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" @@ -21,10 +22,25 @@ func init() { // "default" layout. type S3Layout struct{} +func toS3Backend(repo restic.Repository) *s3.Backend { + b := repo.Backend() + // unwrap cache + if be, ok := b.(*cache.Backend); ok { + b = be.Backend + } + + be, ok := b.(*s3.Backend) + if !ok { + debug.Log("backend is not s3") + return nil + } + return be +} + // Check tests whether the migration can be applied. func (m *S3Layout) Check(ctx context.Context, repo restic.Repository) (bool, error) { - be, ok := repo.Backend().(*s3.Backend) - if !ok { + be := toS3Backend(repo) + if be == nil { debug.Log("backend is not s3") return false, nil } @@ -75,8 +91,8 @@ func (m *S3Layout) moveFiles(ctx context.Context, be *s3.Backend, l backend.Layo // Apply runs the migration. func (m *S3Layout) Apply(ctx context.Context, repo restic.Repository) error { - be, ok := repo.Backend().(*s3.Backend) - if !ok { + be := toS3Backend(repo) + if be == nil { debug.Log("backend is not s3") return errors.New("backend is not s3") } From 3bf53da672db74c7df3ddd992d48245079d09b36 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 23 Jul 2022 11:21:26 +0200 Subject: [PATCH 6/6] Add changelog for stricter checks --- changelog/unreleased/issue-3295 | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 changelog/unreleased/issue-3295 diff --git a/changelog/unreleased/issue-3295 b/changelog/unreleased/issue-3295 new file mode 100644 index 000000000..155612da0 --- /dev/null +++ b/changelog/unreleased/issue-3295 @@ -0,0 +1,12 @@ +Change: Deprecate `check --check-unused` and add further checks + +Since restic 0.12.0, it is expected to still have unused blobs after running +`prune`. This made the `check --check-unused` rather useless and tended to +confuse users. The options has been deprecated and is now ignored. + +`check` now also warns if a repository is using either the legacy S3 layout or +mixed pack files with both tree and data blobs. The latter is known to cause +performance problems. + +https://github.com/restic/restic/issues/3295 +https://github.com/restic/restic/pull/3730