1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2024-12-25 17:27:25 +00:00

prune: Use new Index

This commit is contained in:
Alexander Neumann 2016-08-15 20:13:56 +02:00
parent c0ef1ec6fd
commit 009c803c8a

View file

@ -6,10 +6,9 @@ import (
"restic" "restic"
"restic/backend" "restic/backend"
"restic/debug" "restic/debug"
"restic/list" "restic/index"
"restic/pack" "restic/pack"
"restic/repository" "restic/repository"
"restic/worker"
"time" "time"
"golang.org/x/crypto/ssh/terminal" "golang.org/x/crypto/ssh/terminal"
@ -84,61 +83,49 @@ func (cmd CmdPrune) Execute(args []string) error {
done := make(chan struct{}) done := make(chan struct{})
defer close(done) defer close(done)
cmd.global.Verbosef("loading list of files from the repo\n") cmd.global.Verbosef("building new index for repo\n")
idx, err := index.New(repo)
if err != nil {
return err
}
var stats struct { var stats struct {
blobs int blobs int
packs int packs int
snapshots int snapshots int
bytes int64
} }
packs := make(map[backend.ID]pack.BlobSet) for _, pack := range idx.Packs {
for packID := range repo.List(backend.Data, done) { stats.bytes += pack.Size
debug.Log("CmdPrune.Execute", "found %v", packID.Str())
packs[packID] = pack.NewBlobSet()
stats.packs++
} }
cmd.global.Verbosef("repository contains %v packs (%v blobs) with %v bytes\n",
len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes)))
cmd.global.Verbosef("listing %v files\n", stats.packs) blobCount := make(map[pack.Handle]int)
blobCount := make(map[backend.ID]int)
duplicateBlobs := 0 duplicateBlobs := 0
duplicateBytes := 0 duplicateBytes := 0
rewritePacks := backend.NewIDSet()
ch := make(chan worker.Job) // find duplicate blobs
go list.AllPacks(repo, ch, done) for _, p := range idx.Packs {
for _, entry := range p.Entries {
bar := newProgressMax(cmd.global.ShowProgress(), uint64(len(packs)), "files")
bar.Start()
for job := range ch {
packID := job.Data.(backend.ID)
if job.Error != nil {
cmd.global.Warnf("unable to list pack %v: %v\n", packID.Str(), job.Error)
continue
}
j := job.Result.(list.Result)
debug.Log("CmdPrune.Execute", "pack %v contains %d blobs", packID.Str(), len(j.Entries()))
for _, pb := range j.Entries() {
packs[packID].Insert(pack.Handle{ID: pb.ID, Type: pb.Type})
stats.blobs++ stats.blobs++
blobCount[pb.ID]++ h := pack.Handle{ID: entry.ID, Type: entry.Type}
blobCount[h]++
if blobCount[pb.ID] > 1 { if blobCount[h] > 1 {
duplicateBlobs++ duplicateBlobs++
duplicateBytes += int(pb.Length) duplicateBytes += int(entry.Length)
} }
} }
bar.Report(restic.Stat{Blobs: 1})
} }
bar.Done()
cmd.global.Verbosef("processed %d blobs: %d duplicate blobs, %d duplicate bytes\n", cmd.global.Verbosef("processed %d blobs: %d duplicate blobs, %d duplicate bytes\n",
stats.blobs, duplicateBlobs, duplicateBytes) stats.blobs, duplicateBlobs, duplicateBytes)
cmd.global.Verbosef("load all snapshots\n") cmd.global.Verbosef("load all snapshots\n")
// find referenced blobs
snapshots, err := restic.LoadAllSnapshots(repo) snapshots, err := restic.LoadAllSnapshots(repo)
if err != nil { if err != nil {
return err return err
@ -151,7 +138,7 @@ func (cmd CmdPrune) Execute(args []string) error {
usedBlobs := pack.NewBlobSet() usedBlobs := pack.NewBlobSet()
seenBlobs := pack.NewBlobSet() seenBlobs := pack.NewBlobSet()
bar = newProgressMax(cmd.global.ShowProgress(), uint64(len(snapshots)), "snapshots") bar := newProgressMax(cmd.global.ShowProgress(), uint64(len(snapshots)), "snapshots")
bar.Start() bar.Start()
for _, sn := range snapshots { for _, sn := range snapshots {
debug.Log("CmdPrune.Execute", "process snapshot %v", sn.ID().Str()) debug.Log("CmdPrune.Execute", "process snapshot %v", sn.ID().Str())
@ -168,15 +155,15 @@ func (cmd CmdPrune) Execute(args []string) error {
cmd.global.Verbosef("found %d of %d data blobs still in use\n", len(usedBlobs), stats.blobs) cmd.global.Verbosef("found %d of %d data blobs still in use\n", len(usedBlobs), stats.blobs)
for packID, blobSet := range packs { // find packs that need a rewrite
for h := range blobSet { rewritePacks := backend.NewIDSet()
if !usedBlobs.Has(h) { for h, blob := range idx.Blobs {
rewritePacks.Insert(packID) if !usedBlobs.Has(h) {
} rewritePacks.Merge(blob.Packs)
}
if blobCount[h.ID] > 1 { if blobCount[h] > 1 {
rewritePacks.Insert(packID) rewritePacks.Merge(blob.Packs)
}
} }
} }