1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2024-12-23 16:26:11 +00:00

checker: Decode identical tree nodes only once

Even though the checkTreeWorker skips already processed chunks,
filterTrees did queue the same tree blob on every occurence. This
becomes a serious performance bottleneck for larger number of snapshots
that cover mostly the same directories. Therefore decode a tree blob
exactly once.
This commit is contained in:
Michael Eischer 2019-07-06 18:35:03 +02:00
parent f0d8710611
commit 70f4c014ef

View file

@ -25,7 +25,7 @@ type Checker struct {
blobs restic.IDSet blobs restic.IDSet
blobRefs struct { blobRefs struct {
sync.Mutex sync.Mutex
M map[restic.ID]uint M map[restic.ID]bool
} }
indexes map[restic.ID]*repository.Index indexes map[restic.ID]*repository.Index
@ -44,7 +44,7 @@ func New(repo restic.Repository) *Checker {
repo: repo, repo: repo,
} }
c.blobRefs.M = make(map[restic.ID]uint) c.blobRefs.M = make(map[restic.ID]bool)
return c return c
} }
@ -160,7 +160,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
for blob := range res.Index.Each(ctx) { for blob := range res.Index.Each(ctx) {
c.packs.Insert(blob.PackID) c.packs.Insert(blob.PackID)
c.blobs.Insert(blob.ID) c.blobs.Insert(blob.ID)
c.blobRefs.M[blob.ID] = 0
cnt++ cnt++
if _, ok := packToIndex[blob.PackID]; !ok { if _, ok := packToIndex[blob.PackID]; !ok {
@ -445,20 +444,10 @@ func (c *Checker) checkTreeWorker(ctx context.Context, in <-chan treeJob, out ch
return return
} }
id := job.ID
alreadyChecked := false
c.blobRefs.Lock() c.blobRefs.Lock()
if c.blobRefs.M[id] > 0 { c.blobRefs.M[job.ID] = true
alreadyChecked = true
}
c.blobRefs.M[id]++
debug.Log("tree %v refcount %d", job.ID, c.blobRefs.M[id])
c.blobRefs.Unlock() c.blobRefs.Unlock()
if alreadyChecked {
continue
}
debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error) debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error)
var errs []error var errs []error
@ -497,6 +486,7 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
job treeJob job treeJob
nextTreeID restic.ID nextTreeID restic.ID
outstandingLoadTreeJobs = 0 outstandingLoadTreeJobs = 0
processedTrees = restic.NewIDSet()
) )
outCh = nil outCh = nil
@ -504,8 +494,11 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
for { for {
if loadCh == nil && len(backlog) > 0 { if loadCh == nil && len(backlog) > 0 {
loadCh = loaderChan
nextTreeID, backlog = backlog[0], backlog[1:] nextTreeID, backlog = backlog[0], backlog[1:]
if processedTrees.Has(nextTreeID) {
continue
}
loadCh = loaderChan
} }
if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 { if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 {
@ -520,6 +513,7 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
case loadCh <- nextTreeID: case loadCh <- nextTreeID:
outstandingLoadTreeJobs++ outstandingLoadTreeJobs++
loadCh = nil loadCh = nil
processedTrees.Insert(nextTreeID)
case j, ok := <-inCh: case j, ok := <-inCh:
if !ok { if !ok {
@ -654,8 +648,8 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
for _, blobID := range blobs { for _, blobID := range blobs {
c.blobRefs.Lock() c.blobRefs.Lock()
c.blobRefs.M[blobID]++ c.blobRefs.M[blobID] = true
debug.Log("blob %v refcount %d", blobID, c.blobRefs.M[blobID]) debug.Log("blob %v is referenced", blobID)
c.blobRefs.Unlock() c.blobRefs.Unlock()
if !c.blobs.Has(blobID) { if !c.blobs.Has(blobID) {
@ -675,7 +669,7 @@ func (c *Checker) UnusedBlobs() (blobs restic.IDs) {
debug.Log("checking %d blobs", len(c.blobs)) debug.Log("checking %d blobs", len(c.blobs))
for id := range c.blobs { for id := range c.blobs {
if c.blobRefs.M[id] == 0 { if !c.blobRefs.M[id] {
debug.Log("blob %v not referenced", id) debug.Log("blob %v not referenced", id)
blobs = append(blobs, id) blobs = append(blobs, id)
} }