checker: Decode identical tree nodes only once

Even though the checkTreeWorker skips already processed chunks, filterTrees did queue the same tree blob on every occurence. This becomes a serious performance bottleneck for larger number of snapshots that cover mostly the same directories. Therefore decode a tree blob exactly once.
2024-12-24 08:44:52 +00:00 · 2019-07-06 18:35:03 +02:00 · 2019-07-06 18:35:03 +02:00 · 70f4c014ef
commit 70f4c014ef
parent f0d8710611
1 changed files with 12 additions and 18 deletions
--- a/internal/checker/checker.go
+++ b/internal/checker/checker.go
@ -25,7 +25,7 @@ type Checker struct {
 	blobs    restic.IDSet
 	blobRefs struct {
 		sync.Mutex
-		M map[restic.ID]uint
+		M map[restic.ID]bool
 	}
 	indexes map[restic.ID]*repository.Index
@ -44,7 +44,7 @@ func New(repo restic.Repository) *Checker {
 		repo:        repo,
 	}
-	c.blobRefs.M = make(map[restic.ID]uint)
+	c.blobRefs.M = make(map[restic.ID]bool)
 	return c
 }
@ -160,7 +160,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
 			for blob := range res.Index.Each(ctx) {
 				c.packs.Insert(blob.PackID)
 				c.blobs.Insert(blob.ID)
 				c.blobRefs.M[blob.ID] = 0
 				cnt++
 				if _, ok := packToIndex[blob.PackID]; !ok {
@ -445,20 +444,10 @@ func (c *Checker) checkTreeWorker(ctx context.Context, in <-chan treeJob, out ch
 				return
 			}
 			id := job.ID
 			alreadyChecked := false
 			c.blobRefs.Lock()
-			if c.blobRefs.M[id] > 0 {
+			c.blobRefs.M[job.ID] = true
 				alreadyChecked = true
 			}
 			c.blobRefs.M[id]++
 			debug.Log("tree %v refcount %d", job.ID, c.blobRefs.M[id])
 			c.blobRefs.Unlock()
 			if alreadyChecked {
 				continue
 			}
 			debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error)
 			var errs []error
@ -497,6 +486,7 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
 		job                     treeJob
 		nextTreeID              restic.ID
 		outstandingLoadTreeJobs = 0
 		processedTrees          = restic.NewIDSet()
 	)
 	outCh = nil
@ -504,8 +494,11 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
 	for {
 		if loadCh == nil && len(backlog) > 0 {
 			loadCh = loaderChan
 			nextTreeID, backlog = backlog[0], backlog[1:]
 			if processedTrees.Has(nextTreeID) {
 				continue
 			}
 			loadCh = loaderChan
 		}
 		if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 {
@ -520,6 +513,7 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
 		case loadCh <- nextTreeID:
 			outstandingLoadTreeJobs++
 			loadCh = nil
 			processedTrees.Insert(nextTreeID)
 		case j, ok := <-inCh:
 			if !ok {
@ -654,8 +648,8 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
 	for _, blobID := range blobs {
 		c.blobRefs.Lock()
-		c.blobRefs.M[blobID]++
+		c.blobRefs.M[blobID] = true
-		debug.Log("blob %v refcount %d", blobID, c.blobRefs.M[blobID])
+		debug.Log("blob %v is referenced", blobID)
 		c.blobRefs.Unlock()
 		if !c.blobs.Has(blobID) {
@ -675,7 +669,7 @@ func (c *Checker) UnusedBlobs() (blobs restic.IDs) {
 	debug.Log("checking %d blobs", len(c.blobs))
 	for id := range c.blobs {
-		if c.blobRefs.M[id] == 0 {
+		if !c.blobRefs.M[id] {
 			debug.Log("blob %v not referenced", id)
 			blobs = append(blobs, id)
 		}