mirror of
https://github.com/restic/restic.git
synced 2024-12-23 16:26:11 +00:00
checker: Unify blobs, processed trees and referenced blobs map
The blobRefs map and the processedTrees IDSet are merged to reduce the memory usage. The blobRefs map now uses separate flags to track blob usage as data or tree blob. This prevents skipping of trees whose content is identical to an already processed data blob. A third flag tracks whether a blob exists or not, which removes the need for the blobs IDSet.
This commit is contained in:
parent
35d8413639
commit
36c69e3ca7
1 changed files with 32 additions and 23 deletions
|
@ -22,10 +22,10 @@ import (
|
||||||
// repository (e.g. missing blobs), and needs a valid Repository to work on.
|
// repository (e.g. missing blobs), and needs a valid Repository to work on.
|
||||||
type Checker struct {
|
type Checker struct {
|
||||||
packs restic.IDSet
|
packs restic.IDSet
|
||||||
blobs restic.IDSet
|
|
||||||
blobRefs struct {
|
blobRefs struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
M map[restic.ID]bool
|
// see flags below
|
||||||
|
M map[restic.ID]blobStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
masterIndex *repository.MasterIndex
|
masterIndex *repository.MasterIndex
|
||||||
|
@ -33,16 +33,23 @@ type Checker struct {
|
||||||
repo restic.Repository
|
repo restic.Repository
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type blobStatus uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
blobExists blobStatus = 1 << iota
|
||||||
|
blobReferenced
|
||||||
|
treeProcessed
|
||||||
|
)
|
||||||
|
|
||||||
// New returns a new checker which runs on repo.
|
// New returns a new checker which runs on repo.
|
||||||
func New(repo restic.Repository) *Checker {
|
func New(repo restic.Repository) *Checker {
|
||||||
c := &Checker{
|
c := &Checker{
|
||||||
packs: restic.NewIDSet(),
|
packs: restic.NewIDSet(),
|
||||||
blobs: restic.NewIDSet(),
|
|
||||||
masterIndex: repository.NewMasterIndex(),
|
masterIndex: repository.NewMasterIndex(),
|
||||||
repo: repo,
|
repo: repo,
|
||||||
}
|
}
|
||||||
|
|
||||||
c.blobRefs.M = make(map[restic.ID]bool)
|
c.blobRefs.M = make(map[restic.ID]blobStatus)
|
||||||
|
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
@ -156,7 +163,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
||||||
cnt := 0
|
cnt := 0
|
||||||
for blob := range res.Index.Each(ctx) {
|
for blob := range res.Index.Each(ctx) {
|
||||||
c.packs.Insert(blob.PackID)
|
c.packs.Insert(blob.PackID)
|
||||||
c.blobs.Insert(blob.ID)
|
c.blobRefs.M[blob.ID] = blobExists
|
||||||
cnt++
|
cnt++
|
||||||
|
|
||||||
if _, ok := packToIndex[blob.PackID]; !ok {
|
if _, ok := packToIndex[blob.PackID]; !ok {
|
||||||
|
@ -441,10 +448,6 @@ func (c *Checker) checkTreeWorker(ctx context.Context, in <-chan treeJob, out ch
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
c.blobRefs.Lock()
|
|
||||||
c.blobRefs.M[job.ID] = true
|
|
||||||
c.blobRefs.Unlock()
|
|
||||||
|
|
||||||
debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error)
|
debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error)
|
||||||
|
|
||||||
var errs []error
|
var errs []error
|
||||||
|
@ -469,7 +472,7 @@ func (c *Checker) checkTreeWorker(ctx context.Context, in <-chan treeJob, out ch
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- restic.ID, in <-chan treeJob, out chan<- treeJob) {
|
func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- restic.ID, in <-chan treeJob, out chan<- treeJob) {
|
||||||
defer func() {
|
defer func() {
|
||||||
debug.Log("closing output channels")
|
debug.Log("closing output channels")
|
||||||
close(loaderChan)
|
close(loaderChan)
|
||||||
|
@ -483,7 +486,6 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
|
||||||
job treeJob
|
job treeJob
|
||||||
nextTreeID restic.ID
|
nextTreeID restic.ID
|
||||||
outstandingLoadTreeJobs = 0
|
outstandingLoadTreeJobs = 0
|
||||||
processedTrees = restic.NewIDSet()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
outCh = nil
|
outCh = nil
|
||||||
|
@ -492,9 +494,16 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
|
||||||
for {
|
for {
|
||||||
if loadCh == nil && len(backlog) > 0 {
|
if loadCh == nil && len(backlog) > 0 {
|
||||||
nextTreeID, backlog = backlog[0], backlog[1:]
|
nextTreeID, backlog = backlog[0], backlog[1:]
|
||||||
if processedTrees.Has(nextTreeID) {
|
|
||||||
|
// use a separate flag for processed trees to ensure that check still processes trees
|
||||||
|
// even when a file references a tree blob
|
||||||
|
c.blobRefs.Lock()
|
||||||
|
blobFlags := c.blobRefs.M[nextTreeID]
|
||||||
|
c.blobRefs.Unlock()
|
||||||
|
if (blobFlags & treeProcessed) != 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
loadCh = loaderChan
|
loadCh = loaderChan
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,7 +519,9 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
|
||||||
case loadCh <- nextTreeID:
|
case loadCh <- nextTreeID:
|
||||||
outstandingLoadTreeJobs++
|
outstandingLoadTreeJobs++
|
||||||
loadCh = nil
|
loadCh = nil
|
||||||
processedTrees.Insert(nextTreeID)
|
c.blobRefs.Lock()
|
||||||
|
c.blobRefs.M[nextTreeID] |= treeProcessed | blobReferenced
|
||||||
|
c.blobRefs.Unlock()
|
||||||
|
|
||||||
case j, ok := <-inCh:
|
case j, ok := <-inCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -591,7 +602,7 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
|
||||||
go c.checkTreeWorker(ctx, treeJobChan2, errChan, &wg)
|
go c.checkTreeWorker(ctx, treeJobChan2, errChan, &wg)
|
||||||
}
|
}
|
||||||
|
|
||||||
filterTrees(ctx, trees, treeIDChan, treeJobChan1, treeJobChan2)
|
c.filterTrees(ctx, trees, treeIDChan, treeJobChan1, treeJobChan2)
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
@ -646,15 +657,13 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||||
|
|
||||||
for _, blobID := range blobs {
|
for _, blobID := range blobs {
|
||||||
c.blobRefs.Lock()
|
c.blobRefs.Lock()
|
||||||
c.blobRefs.M[blobID] = true
|
if (c.blobRefs.M[blobID] & blobExists) == 0 {
|
||||||
debug.Log("blob %v is referenced", blobID)
|
|
||||||
c.blobRefs.Unlock()
|
|
||||||
|
|
||||||
if !c.blobs.Has(blobID) {
|
|
||||||
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
||||||
|
|
||||||
errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")})
|
errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")})
|
||||||
}
|
}
|
||||||
|
c.blobRefs.M[blobID] |= blobReferenced
|
||||||
|
debug.Log("blob %v is referenced", blobID)
|
||||||
|
c.blobRefs.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
return errs
|
return errs
|
||||||
|
@ -665,9 +674,9 @@ func (c *Checker) UnusedBlobs() (blobs restic.IDs) {
|
||||||
c.blobRefs.Lock()
|
c.blobRefs.Lock()
|
||||||
defer c.blobRefs.Unlock()
|
defer c.blobRefs.Unlock()
|
||||||
|
|
||||||
debug.Log("checking %d blobs", len(c.blobs))
|
debug.Log("checking %d blobs", len(c.blobRefs.M))
|
||||||
for id := range c.blobs {
|
for id, flags := range c.blobRefs.M {
|
||||||
if !c.blobRefs.M[id] {
|
if (flags & blobReferenced) == 0 {
|
||||||
debug.Log("blob %v not referenced", id)
|
debug.Log("blob %v not referenced", id)
|
||||||
blobs = append(blobs, id)
|
blobs = append(blobs, id)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue