mirror of
https://github.com/restic/restic.git
synced 2024-12-24 08:44:52 +00:00
checker: Decode identical tree nodes only once
Even though the checkTreeWorker skips already processed chunks, filterTrees did queue the same tree blob on every occurence. This becomes a serious performance bottleneck for larger number of snapshots that cover mostly the same directories. Therefore decode a tree blob exactly once.
This commit is contained in:
parent
f0d8710611
commit
70f4c014ef
1 changed files with 12 additions and 18 deletions
|
@ -25,7 +25,7 @@ type Checker struct {
|
||||||
blobs restic.IDSet
|
blobs restic.IDSet
|
||||||
blobRefs struct {
|
blobRefs struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
M map[restic.ID]uint
|
M map[restic.ID]bool
|
||||||
}
|
}
|
||||||
indexes map[restic.ID]*repository.Index
|
indexes map[restic.ID]*repository.Index
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ func New(repo restic.Repository) *Checker {
|
||||||
repo: repo,
|
repo: repo,
|
||||||
}
|
}
|
||||||
|
|
||||||
c.blobRefs.M = make(map[restic.ID]uint)
|
c.blobRefs.M = make(map[restic.ID]bool)
|
||||||
|
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
@ -160,7 +160,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
||||||
for blob := range res.Index.Each(ctx) {
|
for blob := range res.Index.Each(ctx) {
|
||||||
c.packs.Insert(blob.PackID)
|
c.packs.Insert(blob.PackID)
|
||||||
c.blobs.Insert(blob.ID)
|
c.blobs.Insert(blob.ID)
|
||||||
c.blobRefs.M[blob.ID] = 0
|
|
||||||
cnt++
|
cnt++
|
||||||
|
|
||||||
if _, ok := packToIndex[blob.PackID]; !ok {
|
if _, ok := packToIndex[blob.PackID]; !ok {
|
||||||
|
@ -445,20 +444,10 @@ func (c *Checker) checkTreeWorker(ctx context.Context, in <-chan treeJob, out ch
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
id := job.ID
|
|
||||||
alreadyChecked := false
|
|
||||||
c.blobRefs.Lock()
|
c.blobRefs.Lock()
|
||||||
if c.blobRefs.M[id] > 0 {
|
c.blobRefs.M[job.ID] = true
|
||||||
alreadyChecked = true
|
|
||||||
}
|
|
||||||
c.blobRefs.M[id]++
|
|
||||||
debug.Log("tree %v refcount %d", job.ID, c.blobRefs.M[id])
|
|
||||||
c.blobRefs.Unlock()
|
c.blobRefs.Unlock()
|
||||||
|
|
||||||
if alreadyChecked {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error)
|
debug.Log("check tree %v (tree %v, err %v)", job.ID, job.Tree, job.error)
|
||||||
|
|
||||||
var errs []error
|
var errs []error
|
||||||
|
@ -497,6 +486,7 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
|
||||||
job treeJob
|
job treeJob
|
||||||
nextTreeID restic.ID
|
nextTreeID restic.ID
|
||||||
outstandingLoadTreeJobs = 0
|
outstandingLoadTreeJobs = 0
|
||||||
|
processedTrees = restic.NewIDSet()
|
||||||
)
|
)
|
||||||
|
|
||||||
outCh = nil
|
outCh = nil
|
||||||
|
@ -504,8 +494,11 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
|
||||||
|
|
||||||
for {
|
for {
|
||||||
if loadCh == nil && len(backlog) > 0 {
|
if loadCh == nil && len(backlog) > 0 {
|
||||||
loadCh = loaderChan
|
|
||||||
nextTreeID, backlog = backlog[0], backlog[1:]
|
nextTreeID, backlog = backlog[0], backlog[1:]
|
||||||
|
if processedTrees.Has(nextTreeID) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
loadCh = loaderChan
|
||||||
}
|
}
|
||||||
|
|
||||||
if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 {
|
if loadCh == nil && outCh == nil && outstandingLoadTreeJobs == 0 {
|
||||||
|
@ -520,6 +513,7 @@ func filterTrees(ctx context.Context, backlog restic.IDs, loaderChan chan<- rest
|
||||||
case loadCh <- nextTreeID:
|
case loadCh <- nextTreeID:
|
||||||
outstandingLoadTreeJobs++
|
outstandingLoadTreeJobs++
|
||||||
loadCh = nil
|
loadCh = nil
|
||||||
|
processedTrees.Insert(nextTreeID)
|
||||||
|
|
||||||
case j, ok := <-inCh:
|
case j, ok := <-inCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -654,8 +648,8 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||||
|
|
||||||
for _, blobID := range blobs {
|
for _, blobID := range blobs {
|
||||||
c.blobRefs.Lock()
|
c.blobRefs.Lock()
|
||||||
c.blobRefs.M[blobID]++
|
c.blobRefs.M[blobID] = true
|
||||||
debug.Log("blob %v refcount %d", blobID, c.blobRefs.M[blobID])
|
debug.Log("blob %v is referenced", blobID)
|
||||||
c.blobRefs.Unlock()
|
c.blobRefs.Unlock()
|
||||||
|
|
||||||
if !c.blobs.Has(blobID) {
|
if !c.blobs.Has(blobID) {
|
||||||
|
@ -675,7 +669,7 @@ func (c *Checker) UnusedBlobs() (blobs restic.IDs) {
|
||||||
|
|
||||||
debug.Log("checking %d blobs", len(c.blobs))
|
debug.Log("checking %d blobs", len(c.blobs))
|
||||||
for id := range c.blobs {
|
for id := range c.blobs {
|
||||||
if c.blobRefs.M[id] == 0 {
|
if !c.blobRefs.M[id] {
|
||||||
debug.Log("blob %v not referenced", id)
|
debug.Log("blob %v not referenced", id)
|
||||||
blobs = append(blobs, id)
|
blobs = append(blobs, id)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue