From 04f172b38d43b957dd6276524532f5b3805a7fd2 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 11 Jul 2015 16:00:49 +0200 Subject: [PATCH] checker: Add checker and tests --- checker/checker.go | 220 ++++++++++++++++++++++++++++++++++++---- checker/checker_test.go | 83 ++++++++++++++- cmd/restic/cmd_check.go | 18 ++++ test/helpers.go | 15 +++ 4 files changed, 317 insertions(+), 19 deletions(-) diff --git a/checker/checker.go b/checker/checker.go index 1370ffd1d..c449bde9e 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -2,26 +2,15 @@ package checker import ( "encoding/hex" + "errors" + "fmt" + "github.com/restic/restic" "github.com/restic/restic/backend" "github.com/restic/restic/debug" "github.com/restic/restic/repository" ) -// Error is an error in the repository detected by the checker. -type Error struct { - Message string - Err error -} - -func (e Error) Error() string { - if e.Err != nil { - return e.Message + ": " + e.Err.Error() - } - - return e.Message -} - type mapID [backend.IDSize]byte func id2map(id backend.ID) (mid mapID) { @@ -38,6 +27,14 @@ func str2map(s string) (mid mapID, err error) { return id2map(data), nil } +func map2str(id mapID) string { + return hex.EncodeToString(id[:]) +} + +func map2id(id mapID) backend.ID { + return backend.ID(id[:]) +} + // Checker runs various checks on a repository. It is advisable to create an // exclusive Lock in the repository before running any checks. // @@ -127,10 +124,199 @@ func (c *Checker) LoadIndex() error { } debug.Log("LoadIndex", "done, error %v", perr) + + c.repo.SetIndex(c.masterIndex) + return perr } -// Packs checks that all packs referenced in the index are still available. -func (c *Checker) Packs() error { - return nil +// PackError describes an error with a specific pack. +type PackError struct { + ID backend.ID + error +} + +func (e PackError) Error() string { + return "pack " + e.ID.String() + ": " + e.error.Error() +} + +// Packs checks that all packs referenced in the index are still available and +// there are no packs that aren't in an index. +func (c *Checker) Packs() (errs []error) { + debug.Log("Checker.Packs", "checking for %d packs", len(c.packs)) + seenPacks := make(map[mapID]struct{}) + + for id := range c.packs { + seenPacks[id] = struct{}{} + ok, err := c.repo.Backend().Test(backend.Data, map2str(id)) + if err != nil { + debug.Log("Checker.Packs", "error checking for pack %s", map2id(id).Str()) + errs = append(errs, PackError{map2id(id), err}) + continue + } + + if !ok { + debug.Log("Checker.Packs", "pack %s does not exist", map2id(id).Str()) + errs = append(errs, PackError{map2id(id), errors.New("does not exist")}) + continue + } + debug.Log("Checker.Packs", "pack %s exists", map2id(id).Str()) + } + + done := make(chan struct{}) + defer close(done) + + for id := range c.repo.List(backend.Data, done) { + if _, ok := seenPacks[id2map(id)]; !ok { + errs = append(errs, PackError{id, errors.New("not referenced in any index")}) + } + } + + return errs +} + +// Error is an error that occurred while checking a repository. +type Error struct { + TreeID backend.ID + BlobID backend.ID + Err error +} + +func (e Error) Error() string { + if e.BlobID != nil && e.TreeID != nil { + msg := "tree " + e.TreeID.String() + msg += ", blob " + e.BlobID.String() + msg += ": " + e.Err.Error() + return msg + } + + if e.TreeID != nil { + return "tree " + e.TreeID.String() + ": " + e.Err.Error() + } + + return e.Err.Error() +} + +func loadTreeFromSnapshot(repo *repository.Repository, id backend.ID) (backend.ID, error) { + sn, err := restic.LoadSnapshot(repo, id) + if err != nil { + debug.Log("Checker.loadTreeFromSnapshot", "error loading snapshot %v: %v", id.Str(), err) + return nil, err + } + + if sn.Tree == nil { + debug.Log("Checker.loadTreeFromSnapshot", "snapshot %v has no tree", id.Str()) + return nil, fmt.Errorf("snapshot %v has no tree", id) + } + + return sn.Tree, nil +} + +// Structure checks that for all snapshots all referenced blobs are available +// in the index. +func (c *Checker) Structure() (errs []error) { + done := make(chan struct{}) + defer close(done) + + var todo backend.IDs + + for id := range c.repo.List(backend.Snapshot, done) { + debug.Log("Checker.Snaphots", "check snapshot %v", id.Str()) + + treeID, err := loadTreeFromSnapshot(c.repo, id) + if err != nil { + errs = append(errs, err) + continue + } + + debug.Log("Checker.Snaphots", "snapshot %v has tree %v", id.Str(), treeID.Str()) + todo = append(todo, treeID) + } + + errs = append(errs, c.trees(todo)...) + return errs +} + +func (c *Checker) trees(treeIDs backend.IDs) (errs []error) { + treesChecked := make(map[mapID]struct{}) + + for len(treeIDs) > 0 { + id := treeIDs[0] + treeIDs = treeIDs[1:] + + c.blobRefs[id2map(id)]++ + debug.Log("Checker.trees", "tree %v refcount %d", id.Str(), c.blobRefs[id2map(id)]) + + if _, ok := treesChecked[id2map(id)]; ok { + debug.Log("Checker.trees", "tree %v already checked", id.Str()) + continue + } + + debug.Log("Checker.trees", "check tree %v", id.Str()) + + if _, ok := c.blobs[id2map(id)]; !ok { + errs = append(errs, Error{TreeID: id, Err: errors.New("not found in index")}) + continue + } + + blobs, subtrees, treeErrors := c.tree(id) + if treeErrors != nil { + debug.Log("Checker.trees", "error checking tree %v: %v", id.Str(), treeErrors) + errs = append(errs, treeErrors...) + continue + } + + for _, blobID := range blobs { + c.blobRefs[id2map(blobID)]++ + debug.Log("Checker.trees", "blob %v refcount %d", blobID.Str(), c.blobRefs[id2map(blobID)]) + + if _, ok := c.blobs[id2map(blobID)]; !ok { + debug.Log("Checker.trees", "tree %v references blob %v which isn't contained in index", id.Str(), blobID.Str()) + + errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")}) + } + } + + treeIDs = append(treeIDs, subtrees...) + + treesChecked[id2map(id)] = struct{}{} + } + + return errs +} + +func (c *Checker) tree(id backend.ID) (blobs backend.IDs, subtrees backend.IDs, errs []error) { + tree, err := restic.LoadTree(c.repo, id) + if err != nil { + return nil, nil, []error{Error{TreeID: id, Err: err}} + } + + for i, node := range tree.Nodes { + switch node.Type { + case "file": + blobs = append(blobs, node.Content...) + case "dir": + if node.Subtree == nil { + errs = append(errs, Error{TreeID: id, Err: fmt.Errorf("node %d is dir but has no subtree", i)}) + continue + } + + subtrees = append(subtrees, node.Subtree) + } + } + + return blobs, subtrees, errs +} + +// UnusedBlobs returns all blobs that have never been referenced. +func (c *Checker) UnusedBlobs() (blobs backend.IDs) { + debug.Log("Checker.UnusedBlobs", "checking %d blobs", len(c.blobs)) + for id := range c.blobs { + if c.blobRefs[id] == 0 { + debug.Log("Checker.UnusedBlobs", "blob %v not not referenced", map2id(id).Str()) + blobs = append(blobs, map2id(id)) + } + } + + return blobs } diff --git a/checker/checker_test.go b/checker/checker_test.go index e4ed826cc..5fe49e22d 100644 --- a/checker/checker_test.go +++ b/checker/checker_test.go @@ -2,6 +2,7 @@ package checker_test import ( "path/filepath" + "sort" "testing" "github.com/restic/restic/backend" @@ -27,7 +28,85 @@ func TestCheckRepo(t *testing.T) { WithTestEnvironment(t, checkerTestData, func(repodir string) { repo := OpenLocalRepo(t, repodir) - checker := checker.New(repo) - OK(t, checker.LoadIndex()) + chkr := checker.New(repo) + OK(t, chkr.LoadIndex()) + OKs(t, chkr.Packs()) + OKs(t, chkr.Structure()) + }) +} + +func TestMissingPack(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + + packID := "657f7fb64f6a854fff6fe9279998ee09034901eded4e6db9bcee0e59745bbce6" + OK(t, repo.Backend().Remove(backend.Data, packID)) + + chkr := checker.New(repo) + OK(t, chkr.LoadIndex()) + errs := chkr.Packs() + + Assert(t, len(errs) == 1, + "expected exactly one error, got %v", len(errs)) + + if err, ok := errs[0].(checker.PackError); ok { + Equals(t, packID, err.ID.String()) + } else { + t.Errorf("expected error returned by checker.Packs() to be PackError, got %v", err) + } + }) +} + +func TestUnreferencedPack(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + + // index 8eb5 only references pack 60e0 + indexID := "8eb5b61062bf8e959f244fba0c971108bc8d4d2a4b236f71a704998e28cc5cf6" + packID := "60e0438dcb978ec6860cc1f8c43da648170ee9129af8f650f876bad19f8f788e" + OK(t, repo.Backend().Remove(backend.Index, indexID)) + + chkr := checker.New(repo) + OK(t, chkr.LoadIndex()) + errs := chkr.Packs() + + Assert(t, len(errs) == 1, + "expected exactly one error, got %v", len(errs)) + + if err, ok := errs[0].(checker.PackError); ok { + Equals(t, packID, err.ID.String()) + } else { + t.Errorf("expected error returned by checker.Packs() to be PackError, got %v", err) + } + }) +} + +func TestUnreferencedBlobs(t *testing.T) { + WithTestEnvironment(t, checkerTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + + snID := "51d249d28815200d59e4be7b3f21a157b864dc343353df9d8e498220c2499b02" + OK(t, repo.Backend().Remove(backend.Snapshot, snID)) + + unusedBlobsBySnapshot := backend.IDs{ + ParseID("58c748bbe2929fdf30c73262bd8313fe828f8925b05d1d4a87fe109082acb849"), + ParseID("988a272ab9768182abfd1fe7d7a7b68967825f0b861d3b36156795832c772235"), + ParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"), + ParseID("bec3a53d7dc737f9a9bee68b107ec9e8ad722019f649b34d474b9982c3a3fec7"), + ParseID("2a6f01e5e92d8343c4c6b78b51c5a4dc9c39d42c04e26088c7614b13d8d0559d"), + ParseID("18b51b327df9391732ba7aaf841a4885f350d8a557b2da8352c9acf8898e3f10"), + } + + sort.Sort(unusedBlobsBySnapshot) + + chkr := checker.New(repo) + OK(t, chkr.LoadIndex()) + OKs(t, chkr.Packs()) + OKs(t, chkr.Structure()) + + blobs := chkr.UnusedBlobs() + sort.Sort(blobs) + + Equals(t, unusedBlobsBySnapshot, blobs) }) } diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 7f01d0d54..a40cf2fd1 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -2,6 +2,8 @@ package main import ( "errors" + "fmt" + "os" "github.com/restic/restic/checker" ) @@ -50,5 +52,21 @@ func (cmd CmdCheck) Execute(args []string) error { return err } + errorsFound := false + cmd.global.Verbosef("Check all packs\n") + for _, err := range checker.Packs() { + errorsFound = true + fmt.Fprintf(os.Stderr, "error: %v\n", err) + } + + cmd.global.Verbosef("Check snapshots, trees and blobs\n") + for _, err := range checker.Structure() { + errorsFound = true + fmt.Fprintf(os.Stderr, "error: %v\n", err) + } + + if errorsFound { + return errors.New("repository contains errors") + } return nil } diff --git a/test/helpers.go b/test/helpers.go index 544195d9b..a9306cf3a 100644 --- a/test/helpers.go +++ b/test/helpers.go @@ -35,6 +35,21 @@ func OK(tb testing.TB, err error) { } } +// OKs fails the test if any error from errs is not nil. +func OKs(tb testing.TB, errs []error) { + errFound := false + for _, err := range errs { + if err != nil { + errFound = true + _, file, line, _ := runtime.Caller(1) + fmt.Printf("\033[31m%s:%d: unexpected error: %s\033[39m\n\n", filepath.Base(file), line, err.Error()) + } + } + if errFound { + tb.FailNow() + } +} + // Equals fails the test if exp is not equal to act. func Equals(tb testing.TB, exp, act interface{}) { if !reflect.DeepEqual(exp, act) {