2015-06-28 22:22:25 +00:00
|
|
|
package checker
|
|
|
|
|
|
|
|
import (
|
2021-08-20 14:15:40 +00:00
|
|
|
"bufio"
|
2017-06-04 09:16:55 +00:00
|
|
|
"context"
|
2015-07-11 14:00:49 +00:00
|
|
|
"fmt"
|
2021-08-07 22:38:17 +00:00
|
|
|
"runtime"
|
2015-07-11 22:25:42 +00:00
|
|
|
"sync"
|
2015-06-28 22:22:25 +00:00
|
|
|
|
2023-12-31 09:58:26 +00:00
|
|
|
"github.com/klauspost/compress/zstd"
|
2021-11-07 21:33:44 +00:00
|
|
|
"github.com/restic/restic/internal/backend"
|
2022-04-10 12:11:48 +00:00
|
|
|
"github.com/restic/restic/internal/backend/s3"
|
2017-07-23 12:21:03 +00:00
|
|
|
"github.com/restic/restic/internal/debug"
|
2018-10-28 20:12:15 +00:00
|
|
|
"github.com/restic/restic/internal/errors"
|
2017-07-23 12:21:03 +00:00
|
|
|
"github.com/restic/restic/internal/repository"
|
2024-05-24 21:06:44 +00:00
|
|
|
"github.com/restic/restic/internal/repository/index"
|
2024-05-24 21:09:58 +00:00
|
|
|
"github.com/restic/restic/internal/repository/pack"
|
2018-10-28 20:12:15 +00:00
|
|
|
"github.com/restic/restic/internal/restic"
|
2020-11-04 13:11:29 +00:00
|
|
|
"github.com/restic/restic/internal/ui/progress"
|
2018-10-28 20:12:15 +00:00
|
|
|
"golang.org/x/sync/errgroup"
|
2015-06-28 22:22:25 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Checker runs various checks on a repository. It is advisable to create an
|
|
|
|
// exclusive Lock in the repository before running any checks.
|
|
|
|
//
|
|
|
|
// A Checker only tests for internal errors within the data structures of the
|
|
|
|
// repository (e.g. missing blobs), and needs a valid Repository to work on.
|
|
|
|
type Checker struct {
|
2020-11-10 07:16:47 +00:00
|
|
|
packs map[restic.ID]int64
|
2015-07-12 14:42:22 +00:00
|
|
|
blobRefs struct {
|
|
|
|
sync.Mutex
|
2020-11-06 22:41:04 +00:00
|
|
|
M restic.BlobSet
|
2015-07-12 14:42:22 +00:00
|
|
|
}
|
2020-11-06 23:07:32 +00:00
|
|
|
trackUnused bool
|
2015-06-28 22:22:25 +00:00
|
|
|
|
2022-06-12 12:43:43 +00:00
|
|
|
masterIndex *index.MasterIndex
|
2023-10-01 11:05:56 +00:00
|
|
|
snapshots restic.Lister
|
2015-06-28 22:22:25 +00:00
|
|
|
|
2016-08-31 21:07:50 +00:00
|
|
|
repo restic.Repository
|
2015-06-28 22:22:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// New returns a new checker which runs on repo.
|
2020-11-06 23:07:32 +00:00
|
|
|
func New(repo restic.Repository, trackUnused bool) *Checker {
|
2015-07-12 14:42:22 +00:00
|
|
|
c := &Checker{
|
2020-11-10 07:16:47 +00:00
|
|
|
packs: make(map[restic.ID]int64),
|
2022-06-12 12:43:43 +00:00
|
|
|
masterIndex: index.NewMasterIndex(),
|
2015-06-28 22:22:25 +00:00
|
|
|
repo: repo,
|
2020-11-06 23:07:32 +00:00
|
|
|
trackUnused: trackUnused,
|
2015-06-28 22:22:25 +00:00
|
|
|
}
|
2015-07-12 14:42:22 +00:00
|
|
|
|
2020-11-06 22:41:04 +00:00
|
|
|
c.blobRefs.M = restic.NewBlobSet()
|
2015-07-12 14:42:22 +00:00
|
|
|
|
|
|
|
return c
|
2015-06-28 22:22:25 +00:00
|
|
|
}
|
|
|
|
|
2022-04-10 12:11:48 +00:00
|
|
|
// ErrLegacyLayout is returned when the repository uses the S3 legacy layout.
|
2022-11-23 20:12:06 +00:00
|
|
|
var ErrLegacyLayout = errors.New("repository uses S3 legacy layout")
|
2022-04-10 12:11:48 +00:00
|
|
|
|
2015-10-25 15:00:06 +00:00
|
|
|
// ErrDuplicatePacks is returned when a pack is found in more than one index.
|
|
|
|
type ErrDuplicatePacks struct {
|
2016-08-31 20:39:36 +00:00
|
|
|
PackID restic.ID
|
|
|
|
Indexes restic.IDSet
|
2015-10-25 15:00:06 +00:00
|
|
|
}
|
|
|
|
|
2022-05-01 18:08:02 +00:00
|
|
|
func (e *ErrDuplicatePacks) Error() string {
|
2022-04-30 18:27:31 +00:00
|
|
|
return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID, e.Indexes)
|
2015-10-25 15:00:06 +00:00
|
|
|
}
|
|
|
|
|
2022-04-10 12:11:01 +00:00
|
|
|
// ErrMixedPack is returned when a pack is found that contains both tree and data blobs.
|
|
|
|
type ErrMixedPack struct {
|
|
|
|
PackID restic.ID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *ErrMixedPack) Error() string {
|
|
|
|
return fmt.Sprintf("pack %v contains a mix of tree and data blobs", e.PackID.Str())
|
|
|
|
}
|
|
|
|
|
2015-11-08 20:50:48 +00:00
|
|
|
// ErrOldIndexFormat is returned when an index with the old format is
|
|
|
|
// found.
|
|
|
|
type ErrOldIndexFormat struct {
|
2016-08-31 20:39:36 +00:00
|
|
|
restic.ID
|
2015-11-08 20:50:48 +00:00
|
|
|
}
|
|
|
|
|
2022-05-01 18:08:02 +00:00
|
|
|
func (err *ErrOldIndexFormat) Error() string {
|
2022-04-30 18:27:31 +00:00
|
|
|
return fmt.Sprintf("index %v has old format", err.ID)
|
2015-11-08 20:50:48 +00:00
|
|
|
}
|
|
|
|
|
2021-11-07 21:33:44 +00:00
|
|
|
func (c *Checker) LoadSnapshots(ctx context.Context) error {
|
|
|
|
var err error
|
2023-10-01 11:05:56 +00:00
|
|
|
c.snapshots, err = restic.MemorizeList(ctx, c.repo, restic.SnapshotFile)
|
2021-11-07 21:33:44 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-05-19 10:41:56 +00:00
|
|
|
func computePackTypes(ctx context.Context, idx restic.ListBlobser) (map[restic.ID]restic.BlobType, error) {
|
2022-06-12 12:45:42 +00:00
|
|
|
packs := make(map[restic.ID]restic.BlobType)
|
2024-05-19 10:41:56 +00:00
|
|
|
err := idx.ListBlobs(ctx, func(pb restic.PackedBlob) {
|
2022-06-12 12:45:42 +00:00
|
|
|
tpe, exists := packs[pb.PackID]
|
|
|
|
if exists {
|
|
|
|
if pb.Type != tpe {
|
|
|
|
tpe = restic.InvalidBlob
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
tpe = pb.Type
|
|
|
|
}
|
|
|
|
packs[pb.PackID] = tpe
|
|
|
|
})
|
2024-04-05 20:20:14 +00:00
|
|
|
return packs, err
|
2022-06-12 12:45:42 +00:00
|
|
|
}
|
|
|
|
|
2015-06-28 22:22:25 +00:00
|
|
|
// LoadIndex loads all index files.
|
2023-10-01 17:48:56 +00:00
|
|
|
func (c *Checker) LoadIndex(ctx context.Context, p *progress.Counter) (hints []error, errs []error) {
|
2016-09-27 20:35:08 +00:00
|
|
|
debug.Log("Start")
|
2019-03-24 20:27:28 +00:00
|
|
|
|
2020-12-18 18:37:08 +00:00
|
|
|
packToIndex := make(map[restic.ID]restic.IDSet)
|
2024-05-19 13:37:54 +00:00
|
|
|
err := c.masterIndex.Load(ctx, c.repo, p, func(id restic.ID, idx *index.Index, oldFormat bool, err error) error {
|
2020-12-18 18:37:08 +00:00
|
|
|
debug.Log("process index %v, err %v", id, err)
|
2019-03-24 20:27:28 +00:00
|
|
|
|
2020-12-18 18:37:08 +00:00
|
|
|
if oldFormat {
|
2022-04-30 18:27:31 +00:00
|
|
|
debug.Log("index %v has old format", id)
|
2022-05-01 18:08:02 +00:00
|
|
|
hints = append(hints, &ErrOldIndexFormat{id})
|
2020-12-18 18:37:08 +00:00
|
|
|
}
|
2015-06-28 22:22:25 +00:00
|
|
|
|
2022-04-30 18:27:31 +00:00
|
|
|
err = errors.Wrapf(err, "error loading index %v", id)
|
2015-06-28 22:22:25 +00:00
|
|
|
|
2020-12-18 18:37:08 +00:00
|
|
|
if err != nil {
|
|
|
|
errs = append(errs, err)
|
2019-03-24 20:27:28 +00:00
|
|
|
return nil
|
|
|
|
}
|
2015-10-25 15:00:06 +00:00
|
|
|
|
2020-12-18 18:37:08 +00:00
|
|
|
debug.Log("process blobs")
|
|
|
|
cnt := 0
|
2024-05-19 13:37:54 +00:00
|
|
|
err = idx.Each(ctx, func(blob restic.PackedBlob) {
|
2020-12-18 18:37:08 +00:00
|
|
|
cnt++
|
2019-03-24 20:27:28 +00:00
|
|
|
|
2020-12-18 18:37:08 +00:00
|
|
|
if _, ok := packToIndex[blob.PackID]; !ok {
|
|
|
|
packToIndex[blob.PackID] = restic.NewIDSet()
|
2015-10-25 15:00:06 +00:00
|
|
|
}
|
2020-12-18 18:37:08 +00:00
|
|
|
packToIndex[blob.PackID].Insert(id)
|
2022-08-19 18:04:39 +00:00
|
|
|
})
|
2020-12-18 18:37:08 +00:00
|
|
|
|
|
|
|
debug.Log("%d blobs processed", cnt)
|
2024-04-05 20:20:14 +00:00
|
|
|
return err
|
2019-03-24 20:27:28 +00:00
|
|
|
})
|
|
|
|
if err != nil {
|
2024-05-19 13:37:54 +00:00
|
|
|
// failed to load the index
|
2021-01-30 15:35:05 +00:00
|
|
|
return hints, append(errs, err)
|
|
|
|
}
|
2020-11-10 07:16:47 +00:00
|
|
|
|
2024-05-19 10:41:56 +00:00
|
|
|
err = c.repo.SetIndex(c.masterIndex)
|
|
|
|
if err != nil {
|
|
|
|
debug.Log("SetIndex returned error: %v", err)
|
|
|
|
errs = append(errs, err)
|
|
|
|
}
|
|
|
|
|
2020-11-10 07:16:47 +00:00
|
|
|
// compute pack size using index entries
|
2024-05-19 10:41:56 +00:00
|
|
|
c.packs, err = pack.Size(ctx, c.repo, false)
|
2024-04-05 20:20:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return hints, append(errs, err)
|
|
|
|
}
|
2024-05-19 10:41:56 +00:00
|
|
|
packTypes, err := computePackTypes(ctx, c.repo)
|
2024-04-05 20:20:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return hints, append(errs, err)
|
|
|
|
}
|
2020-11-10 07:16:47 +00:00
|
|
|
|
2016-09-27 20:35:08 +00:00
|
|
|
debug.Log("checking for duplicate packs")
|
2015-10-25 15:00:06 +00:00
|
|
|
for packID := range c.packs {
|
2018-01-25 19:49:41 +00:00
|
|
|
debug.Log(" check pack %v: contained in %d indexes", packID, len(packToIndex[packID]))
|
2015-10-25 15:00:06 +00:00
|
|
|
if len(packToIndex[packID]) > 1 {
|
2022-05-01 18:08:02 +00:00
|
|
|
hints = append(hints, &ErrDuplicatePacks{
|
2015-10-25 15:00:06 +00:00
|
|
|
PackID: packID,
|
|
|
|
Indexes: packToIndex[packID],
|
2015-10-25 15:26:50 +00:00
|
|
|
})
|
2015-10-25 15:00:06 +00:00
|
|
|
}
|
2022-06-12 12:45:42 +00:00
|
|
|
if packTypes[packID] == restic.InvalidBlob {
|
2022-04-10 12:11:01 +00:00
|
|
|
hints = append(hints, &ErrMixedPack{
|
|
|
|
PackID: packID,
|
|
|
|
})
|
|
|
|
}
|
2015-10-25 15:00:06 +00:00
|
|
|
}
|
|
|
|
|
2015-10-25 15:26:50 +00:00
|
|
|
return hints, errs
|
2015-06-28 22:22:25 +00:00
|
|
|
}
|
|
|
|
|
2015-07-11 14:00:49 +00:00
|
|
|
// PackError describes an error with a specific pack.
|
|
|
|
type PackError struct {
|
2016-08-31 20:39:36 +00:00
|
|
|
ID restic.ID
|
2015-07-12 15:09:48 +00:00
|
|
|
Orphaned bool
|
|
|
|
Err error
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
|
|
|
|
2022-06-13 18:35:37 +00:00
|
|
|
func (e *PackError) Error() string {
|
2022-04-30 18:27:31 +00:00
|
|
|
return "pack " + e.ID.String() + ": " + e.Err.Error()
|
2018-04-07 08:07:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// IsOrphanedPack returns true if the error describes a pack which is not
|
|
|
|
// contained in any index.
|
|
|
|
func IsOrphanedPack(err error) bool {
|
2022-06-13 18:35:37 +00:00
|
|
|
var e *PackError
|
|
|
|
return errors.As(err, &e) && e.Orphaned
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
|
|
|
|
2023-10-01 09:40:12 +00:00
|
|
|
func isS3Legacy(b backend.Backend) bool {
|
2024-01-19 22:32:17 +00:00
|
|
|
be := backend.AsBackend[*s3.Backend](b)
|
|
|
|
return be != nil && be.Layout.Name() == "s3legacy"
|
2022-04-10 12:11:48 +00:00
|
|
|
}
|
|
|
|
|
2015-07-11 22:25:42 +00:00
|
|
|
// Packs checks that all packs referenced in the index are still available and
|
2015-07-11 23:44:19 +00:00
|
|
|
// there are no packs that aren't in an index. errChan is closed after all
|
|
|
|
// packs have been checked.
|
2017-06-04 09:16:55 +00:00
|
|
|
func (c *Checker) Packs(ctx context.Context, errChan chan<- error) {
|
2015-07-11 23:44:19 +00:00
|
|
|
defer close(errChan)
|
|
|
|
|
2024-05-10 13:55:45 +00:00
|
|
|
if r, ok := c.repo.(*repository.Repository); ok {
|
|
|
|
if isS3Legacy(repository.AsS3Backend(r)) {
|
|
|
|
errChan <- ErrLegacyLayout
|
|
|
|
}
|
2022-04-10 12:11:48 +00:00
|
|
|
}
|
|
|
|
|
2016-09-27 20:35:08 +00:00
|
|
|
debug.Log("checking for %d packs", len(c.packs))
|
2015-07-11 22:25:42 +00:00
|
|
|
|
2018-01-12 02:00:48 +00:00
|
|
|
debug.Log("listing repository packs")
|
2020-11-10 07:16:47 +00:00
|
|
|
repoPacks := make(map[restic.ID]int64)
|
2018-01-21 16:25:36 +00:00
|
|
|
|
2020-08-16 09:16:38 +00:00
|
|
|
err := c.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error {
|
2020-11-10 07:16:47 +00:00
|
|
|
repoPacks[id] = size
|
2018-01-21 16:25:36 +00:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
errChan <- err
|
2015-07-11 22:25:42 +00:00
|
|
|
}
|
|
|
|
|
2020-11-10 07:16:47 +00:00
|
|
|
for id, size := range c.packs {
|
|
|
|
reposize, ok := repoPacks[id]
|
|
|
|
// remove from repoPacks so we can find orphaned packs
|
|
|
|
delete(repoPacks, id)
|
|
|
|
|
|
|
|
// missing: present in c.packs but not in the repo
|
|
|
|
if !ok {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2022-06-13 18:35:37 +00:00
|
|
|
case errChan <- &PackError{ID: id, Err: errors.New("does not exist")}:
|
2020-11-10 07:16:47 +00:00
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// size not matching: present in c.packs and in the repo, but sizes do not match
|
|
|
|
if size != reposize {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2022-06-13 18:35:37 +00:00
|
|
|
case errChan <- &PackError{ID: id, Err: errors.Errorf("unexpected file size: got %d, expected %d", reposize, size)}:
|
2020-11-10 07:16:47 +00:00
|
|
|
}
|
2018-01-12 02:00:48 +00:00
|
|
|
}
|
2015-07-11 22:25:42 +00:00
|
|
|
}
|
|
|
|
|
2020-11-10 07:16:47 +00:00
|
|
|
// orphaned: present in the repo but not in c.packs
|
|
|
|
for orphanID := range repoPacks {
|
2018-01-12 02:00:48 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
2022-06-13 18:35:37 +00:00
|
|
|
case errChan <- &PackError{ID: orphanID, Orphaned: true, Err: errors.New("not referenced in any index")}:
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Error is an error that occurred while checking a repository.
|
|
|
|
type Error struct {
|
2016-08-31 20:39:36 +00:00
|
|
|
TreeID restic.ID
|
2015-07-11 14:00:49 +00:00
|
|
|
Err error
|
|
|
|
}
|
|
|
|
|
2022-10-14 12:13:32 +00:00
|
|
|
func (e *Error) Error() string {
|
2016-02-26 22:06:25 +00:00
|
|
|
if !e.TreeID.IsNull() {
|
2022-04-30 18:27:31 +00:00
|
|
|
return "tree " + e.TreeID.String() + ": " + e.Err.Error()
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return e.Err.Error()
|
|
|
|
}
|
|
|
|
|
2015-10-11 17:13:35 +00:00
|
|
|
// TreeError collects several errors that occurred while processing a tree.
|
2015-07-12 14:42:22 +00:00
|
|
|
type TreeError struct {
|
2016-08-31 20:39:36 +00:00
|
|
|
ID restic.ID
|
2015-07-12 14:42:22 +00:00
|
|
|
Errors []error
|
|
|
|
}
|
|
|
|
|
2022-05-01 18:08:02 +00:00
|
|
|
func (e *TreeError) Error() string {
|
2022-04-30 18:27:31 +00:00
|
|
|
return fmt.Sprintf("tree %v: %v", e.ID, e.Errors)
|
2015-07-12 14:42:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// checkTreeWorker checks the trees received and sends out errors to errChan.
|
2020-11-06 23:39:40 +00:00
|
|
|
func (c *Checker) checkTreeWorker(ctx context.Context, trees <-chan restic.TreeItem, out chan<- error) {
|
|
|
|
for job := range trees {
|
|
|
|
debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.Error)
|
2020-11-06 23:23:45 +00:00
|
|
|
|
|
|
|
var errs []error
|
2020-11-06 23:39:40 +00:00
|
|
|
if job.Error != nil {
|
|
|
|
errs = append(errs, job.Error)
|
2020-11-06 23:23:45 +00:00
|
|
|
} else {
|
|
|
|
errs = c.checkTree(job.ID, job.Tree)
|
|
|
|
}
|
2015-07-11 14:00:49 +00:00
|
|
|
|
2020-11-06 23:23:45 +00:00
|
|
|
if len(errs) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
2022-05-01 18:08:02 +00:00
|
|
|
treeError := &TreeError{ID: job.ID, Errors: errs}
|
2015-07-12 14:42:22 +00:00
|
|
|
select {
|
2017-06-04 09:16:55 +00:00
|
|
|
case <-ctx.Done():
|
2015-07-12 14:42:22 +00:00
|
|
|
return
|
2020-11-06 23:23:45 +00:00
|
|
|
case out <- treeError:
|
2016-09-27 20:35:08 +00:00
|
|
|
debug.Log("tree %v: sent %d errors", treeError.ID, len(treeError.Errors))
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
2015-07-12 14:42:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-19 21:44:50 +00:00
|
|
|
func loadSnapshotTreeIDs(ctx context.Context, lister restic.Lister, repo restic.LoaderUnpacked) (ids restic.IDs, errs []error) {
|
2021-11-07 21:33:44 +00:00
|
|
|
err := restic.ForAllSnapshots(ctx, lister, repo, nil, func(id restic.ID, sn *restic.Snapshot, err error) error {
|
2020-11-28 08:32:06 +00:00
|
|
|
if err != nil {
|
|
|
|
errs = append(errs, err)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
treeID := *sn.Tree
|
|
|
|
debug.Log("snapshot %v has tree %v", id, treeID)
|
|
|
|
ids = append(ids, treeID)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
errs = append(errs, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return ids, errs
|
|
|
|
}
|
|
|
|
|
2015-07-12 14:42:22 +00:00
|
|
|
// Structure checks that for all snapshots all referenced data blobs and
|
|
|
|
// subtrees are available in the index. errChan is closed after all trees have
|
|
|
|
// been traversed.
|
2020-12-05 23:07:45 +00:00
|
|
|
func (c *Checker) Structure(ctx context.Context, p *progress.Counter, errChan chan<- error) {
|
2021-11-07 21:33:44 +00:00
|
|
|
trees, errs := loadSnapshotTreeIDs(ctx, c.snapshots, c.repo)
|
2020-12-05 23:07:45 +00:00
|
|
|
p.SetMax(uint64(len(trees)))
|
2016-09-27 20:35:08 +00:00
|
|
|
debug.Log("need to check %d trees from snapshots, %d errs returned", len(trees), len(errs))
|
2015-07-11 14:00:49 +00:00
|
|
|
|
2015-07-12 14:42:22 +00:00
|
|
|
for _, err := range errs {
|
|
|
|
select {
|
2017-06-04 09:16:55 +00:00
|
|
|
case <-ctx.Done():
|
2015-07-12 14:42:22 +00:00
|
|
|
return
|
|
|
|
case errChan <- err:
|
|
|
|
}
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
|
|
|
|
2020-11-06 23:23:45 +00:00
|
|
|
wg, ctx := errgroup.WithContext(ctx)
|
2020-11-06 23:39:40 +00:00
|
|
|
treeStream := restic.StreamTrees(ctx, wg, c.repo, trees, func(treeID restic.ID) bool {
|
|
|
|
// blobRefs may be accessed in parallel by checkTree
|
|
|
|
c.blobRefs.Lock()
|
|
|
|
h := restic.BlobHandle{ID: treeID, Type: restic.TreeBlob}
|
|
|
|
blobReferenced := c.blobRefs.M.Has(h)
|
|
|
|
// noop if already referenced
|
|
|
|
c.blobRefs.M.Insert(h)
|
|
|
|
c.blobRefs.Unlock()
|
|
|
|
return blobReferenced
|
2020-12-05 23:07:45 +00:00
|
|
|
}, p)
|
2015-07-11 14:00:49 +00:00
|
|
|
|
2020-11-06 23:23:45 +00:00
|
|
|
defer close(errChan)
|
2021-08-07 22:38:17 +00:00
|
|
|
// The checkTree worker only processes already decoded trees and is thus CPU-bound
|
|
|
|
workerCount := runtime.GOMAXPROCS(0)
|
|
|
|
for i := 0; i < workerCount; i++ {
|
2020-11-06 23:23:45 +00:00
|
|
|
wg.Go(func() error {
|
|
|
|
c.checkTreeWorker(ctx, treeStream, errChan)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-01-30 15:46:34 +00:00
|
|
|
// the wait group should not return an error because no worker returns an
|
|
|
|
// error, so panic if that has changed somehow.
|
|
|
|
err := wg.Wait()
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2015-07-12 14:42:22 +00:00
|
|
|
}
|
|
|
|
|
2016-08-31 20:39:36 +00:00
|
|
|
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
2018-01-25 19:49:41 +00:00
|
|
|
debug.Log("checking tree %v", id)
|
2015-07-12 14:42:22 +00:00
|
|
|
|
2015-10-11 18:55:28 +00:00
|
|
|
for _, node := range tree.Nodes {
|
2016-09-01 19:20:03 +00:00
|
|
|
switch node.Type {
|
2015-07-11 14:00:49 +00:00
|
|
|
case "file":
|
2016-04-10 14:51:16 +00:00
|
|
|
if node.Content == nil {
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.Errorf("file %q has nil blob list", node.Name)})
|
2016-04-10 14:51:16 +00:00
|
|
|
}
|
|
|
|
|
2015-10-11 18:55:28 +00:00
|
|
|
for b, blobID := range node.Content {
|
|
|
|
if blobID.IsNull() {
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
|
2015-10-11 18:55:28 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-12-20 07:48:31 +00:00
|
|
|
// Note that we do not use the blob size. The "obvious" check
|
|
|
|
// whether the sum of the blob sizes matches the file size
|
2023-12-06 12:11:55 +00:00
|
|
|
// unfortunately fails in some cases that are not resolvable
|
2020-12-20 07:48:31 +00:00
|
|
|
// by users, so we omit this check, see #1887
|
|
|
|
|
2024-05-19 12:54:50 +00:00
|
|
|
_, found := c.repo.LookupBlobSize(restic.DataBlob, blobID)
|
2018-03-31 11:22:25 +00:00
|
|
|
if !found {
|
2020-11-06 22:32:31 +00:00
|
|
|
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.Errorf("file %q blob %v not found in index", node.Name, blobID)})
|
2018-03-31 11:22:25 +00:00
|
|
|
}
|
|
|
|
}
|
2020-11-06 23:07:32 +00:00
|
|
|
|
|
|
|
if c.trackUnused {
|
|
|
|
// loop a second time to keep the locked section as short as possible
|
|
|
|
c.blobRefs.Lock()
|
|
|
|
for _, blobID := range node.Content {
|
|
|
|
if blobID.IsNull() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
|
|
|
|
c.blobRefs.M.Insert(h)
|
|
|
|
debug.Log("blob %v is referenced", blobID)
|
|
|
|
}
|
|
|
|
c.blobRefs.Unlock()
|
|
|
|
}
|
|
|
|
|
2015-07-11 14:00:49 +00:00
|
|
|
case "dir":
|
|
|
|
if node.Subtree == nil {
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
|
2015-07-11 14:00:49 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-10-11 16:46:26 +00:00
|
|
|
|
|
|
|
if node.Subtree.IsNull() {
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.Errorf("dir node %q subtree id is null", node.Name)})
|
2015-10-11 16:46:26 +00:00
|
|
|
continue
|
|
|
|
}
|
2016-04-10 14:51:16 +00:00
|
|
|
|
2016-07-20 18:46:57 +00:00
|
|
|
case "symlink", "socket", "chardev", "dev", "fifo":
|
2016-05-08 21:16:01 +00:00
|
|
|
// nothing to check
|
|
|
|
|
2016-04-10 14:51:16 +00:00
|
|
|
default:
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.Errorf("node %q with invalid type %q", node.Name, node.Type)})
|
2016-04-10 14:51:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if node.Name == "" {
|
2022-10-14 12:13:32 +00:00
|
|
|
errs = append(errs, &Error{TreeID: id, Err: errors.New("node with empty name")})
|
2015-07-12 14:42:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return errs
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// UnusedBlobs returns all blobs that have never been referenced.
|
2024-04-05 20:20:14 +00:00
|
|
|
func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, err error) {
|
2020-11-06 23:07:32 +00:00
|
|
|
if !c.trackUnused {
|
|
|
|
panic("only works when tracking blob references")
|
|
|
|
}
|
2015-07-12 14:42:22 +00:00
|
|
|
c.blobRefs.Lock()
|
|
|
|
defer c.blobRefs.Unlock()
|
|
|
|
|
2019-07-13 16:34:55 +00:00
|
|
|
debug.Log("checking %d blobs", len(c.blobRefs.M))
|
2020-11-06 22:41:04 +00:00
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
|
|
defer cancel()
|
|
|
|
|
2024-05-19 10:41:56 +00:00
|
|
|
err = c.repo.ListBlobs(ctx, func(blob restic.PackedBlob) {
|
2020-11-06 22:41:04 +00:00
|
|
|
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
|
|
|
if !c.blobRefs.M.Has(h) {
|
|
|
|
debug.Log("blob %v not referenced", h)
|
|
|
|
blobs = append(blobs, h)
|
2015-07-11 14:00:49 +00:00
|
|
|
}
|
2022-08-19 18:04:39 +00:00
|
|
|
})
|
2015-07-11 14:00:49 +00:00
|
|
|
|
2024-04-05 20:20:14 +00:00
|
|
|
return blobs, err
|
2015-06-28 22:22:25 +00:00
|
|
|
}
|
2015-07-12 15:09:48 +00:00
|
|
|
|
2015-12-06 16:29:31 +00:00
|
|
|
// CountPacks returns the number of packs in the repository.
|
|
|
|
func (c *Checker) CountPacks() uint64 {
|
|
|
|
return uint64(len(c.packs))
|
|
|
|
}
|
|
|
|
|
2018-01-02 05:38:14 +00:00
|
|
|
// GetPacks returns IDSet of packs in the repository
|
2020-11-10 07:16:47 +00:00
|
|
|
func (c *Checker) GetPacks() map[restic.ID]int64 {
|
2018-01-02 05:38:14 +00:00
|
|
|
return c.packs
|
|
|
|
}
|
|
|
|
|
2015-12-02 21:40:36 +00:00
|
|
|
// ReadData loads all data from the repository and checks the integrity.
|
2020-11-08 20:03:59 +00:00
|
|
|
func (c *Checker) ReadData(ctx context.Context, errChan chan<- error) {
|
|
|
|
c.ReadPacks(ctx, c.packs, nil, errChan)
|
2018-01-02 05:38:14 +00:00
|
|
|
}
|
|
|
|
|
2024-04-22 18:53:31 +00:00
|
|
|
const maxStreamBufferSize = 4 * 1024 * 1024
|
|
|
|
|
2018-01-02 05:38:14 +00:00
|
|
|
// ReadPacks loads data from specified packs and checks the integrity.
|
2020-11-10 07:16:47 +00:00
|
|
|
func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *progress.Counter, errChan chan<- error) {
|
2015-12-02 21:40:36 +00:00
|
|
|
defer close(errChan)
|
|
|
|
|
2018-01-21 16:25:36 +00:00
|
|
|
g, ctx := errgroup.WithContext(ctx)
|
2021-08-20 14:15:40 +00:00
|
|
|
type checkTask struct {
|
|
|
|
id restic.ID
|
|
|
|
size int64
|
|
|
|
blobs []restic.Blob
|
2020-11-10 07:16:47 +00:00
|
|
|
}
|
2021-08-20 14:15:40 +00:00
|
|
|
ch := make(chan checkTask)
|
2015-12-02 21:40:36 +00:00
|
|
|
|
2021-08-07 22:38:17 +00:00
|
|
|
// as packs are streamed the concurrency is limited by IO
|
|
|
|
workerCount := int(c.repo.Connections())
|
2018-01-21 16:25:36 +00:00
|
|
|
// run workers
|
2021-08-07 22:38:17 +00:00
|
|
|
for i := 0; i < workerCount; i++ {
|
2018-01-21 16:25:36 +00:00
|
|
|
g.Go(func() error {
|
2024-04-22 18:53:31 +00:00
|
|
|
bufRd := bufio.NewReaderSize(nil, maxStreamBufferSize)
|
2023-12-31 09:58:26 +00:00
|
|
|
dec, err := zstd.NewReader(nil)
|
|
|
|
if err != nil {
|
|
|
|
panic(dec)
|
|
|
|
}
|
|
|
|
defer dec.Close()
|
2018-01-21 16:25:36 +00:00
|
|
|
for {
|
2021-08-20 14:15:40 +00:00
|
|
|
var ps checkTask
|
2018-01-21 16:25:36 +00:00
|
|
|
var ok bool
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil
|
2020-11-10 07:16:47 +00:00
|
|
|
case ps, ok = <-ch:
|
2018-01-21 16:25:36 +00:00
|
|
|
if !ok {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
2021-08-20 14:15:40 +00:00
|
|
|
|
2024-05-10 14:28:23 +00:00
|
|
|
err := repository.CheckPack(ctx, c.repo.(*repository.Repository), ps.id, ps.blobs, ps.size, bufRd, dec)
|
2020-11-04 13:11:29 +00:00
|
|
|
p.Add(1)
|
2018-01-21 16:25:36 +00:00
|
|
|
if err == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil
|
|
|
|
case errChan <- err:
|
|
|
|
}
|
2015-12-06 16:09:06 +00:00
|
|
|
}
|
2018-01-21 16:25:36 +00:00
|
|
|
})
|
2015-12-02 21:40:36 +00:00
|
|
|
}
|
2015-12-06 16:09:06 +00:00
|
|
|
|
2021-08-20 14:15:40 +00:00
|
|
|
packSet := restic.NewIDSet()
|
|
|
|
for pack := range packs {
|
|
|
|
packSet.Insert(pack)
|
|
|
|
}
|
|
|
|
|
2018-01-02 05:38:14 +00:00
|
|
|
// push packs to ch
|
2024-05-19 10:41:56 +00:00
|
|
|
for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) {
|
2021-08-20 14:15:40 +00:00
|
|
|
size := packs[pbs.PackID]
|
|
|
|
debug.Log("listed %v", pbs.PackID)
|
2018-01-02 05:38:14 +00:00
|
|
|
select {
|
2021-08-20 14:15:40 +00:00
|
|
|
case ch <- checkTask{id: pbs.PackID, size: size, blobs: pbs.Blobs}:
|
2018-01-02 05:38:14 +00:00
|
|
|
case <-ctx.Done():
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(ch)
|
|
|
|
|
2018-01-21 16:25:36 +00:00
|
|
|
err := g.Wait()
|
|
|
|
if err != nil {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case errChan <- err:
|
|
|
|
}
|
2015-12-06 16:09:06 +00:00
|
|
|
}
|
2015-12-02 21:40:36 +00:00
|
|
|
}
|