Refactor rebuild-index code

This code reads all pack headers from all packs and rebuilds the index
from scratch. Afterwards, all indexes are removed. This is needed
because in #434 the command `optimize` produced a broken index that
did not contain a blob any more. Running `rebuild-index` should fix
this.
This commit is contained in:
Alexander Neumann 2016-02-23 23:48:55 +01:00
parent bc911f4609
commit 4cefd456bb
2 changed files with 68 additions and 131 deletions

View File

@ -101,7 +101,7 @@ func printTrees(repo *repository.Repository, wr io.Writer) error {
return nil return nil
} }
const numWorkers = 10 const dumpPackWorkers = 10
// Pack is the struct used in printPacks. // Pack is the struct used in printPacks.
type Pack struct { type Pack struct {
@ -138,7 +138,7 @@ func printPacks(repo *repository.Repository, wr io.Writer) error {
jobCh := make(chan worker.Job) jobCh := make(chan worker.Job)
resCh := make(chan worker.Job) resCh := make(chan worker.Job)
wp := worker.New(numWorkers, f, jobCh, resCh) wp := worker.New(dumpPackWorkers, f, jobCh, resCh)
go func() { go func() {
for name := range repo.Backend().List(backend.Data, done) { for name := range repo.Backend().List(backend.Data, done) {

View File

@ -1,13 +1,13 @@
package main package main
import ( import (
"bytes"
"fmt" "fmt"
"os"
"restic/backend" "restic/backend"
"restic/debug" "restic/debug"
"restic/pack" "restic/pack"
"restic/repository" "restic/repository"
"restic/worker"
) )
type CmdRebuildIndex struct { type CmdRebuildIndex struct {
@ -26,164 +26,101 @@ func init() {
} }
} }
func (cmd CmdRebuildIndex) storeIndex(index *repository.Index) (*repository.Index, error) { const rebuildIndexWorkers = 10
debug.Log("RebuildIndex.RebuildIndex", "saving index")
cmd.global.Printf(" saving new index\n")
id, err := repository.SaveIndex(cmd.repo, index)
if err != nil {
debug.Log("RebuildIndex.RebuildIndex", "error saving index: %v", err)
return nil, err
}
debug.Log("RebuildIndex.RebuildIndex", "index saved as %v", id.Str())
index = repository.NewIndex()
return index, nil
}
func (cmd CmdRebuildIndex) RebuildIndex() error {
debug.Log("RebuildIndex.RebuildIndex", "start")
func loadBlobsFromPacks(repo *repository.Repository) (packs map[backend.ID][]pack.Blob) {
done := make(chan struct{}) done := make(chan struct{})
defer close(done) defer close(done)
indexIDs := backend.NewIDSet() f := func(job worker.Job, done <-chan struct{}) (interface{}, error) {
for id := range cmd.repo.List(backend.Index, done) { id := job.Data.(backend.ID)
indexIDs.Insert(id)
}
cmd.global.Printf("rebuilding index from %d indexes\n", len(indexIDs)) h := backend.Handle{Type: backend.Data, Name: id.String()}
rd := backend.NewReadSeeker(repo.Backend(), h)
debug.Log("RebuildIndex.RebuildIndex", "found %v indexes", len(indexIDs)) unpacker, err := pack.NewUnpacker(repo.Key(), rd)
combinedIndex := repository.NewIndex()
packsDone := backend.NewIDSet()
type Blob struct {
id backend.ID
tpe pack.BlobType
}
blobsDone := make(map[Blob]struct{})
i := 0
for indexID := range indexIDs {
cmd.global.Printf(" loading index %v\n", i)
debug.Log("RebuildIndex.RebuildIndex", "load index %v", indexID.Str())
idx, err := repository.LoadIndex(cmd.repo, indexID)
if err != nil { if err != nil {
return err return nil, err
} }
debug.Log("RebuildIndex.RebuildIndex", "adding blobs from index %v", indexID.Str()) return unpacker.Entries, nil
for packedBlob := range idx.Each(done) {
packsDone.Insert(packedBlob.PackID)
b := Blob{
id: packedBlob.ID,
tpe: packedBlob.Type,
}
if _, ok := blobsDone[b]; ok {
continue
}
blobsDone[b] = struct{}{}
combinedIndex.Store(packedBlob)
}
combinedIndex.AddToSupersedes(indexID)
if repository.IndexFull(combinedIndex) {
combinedIndex, err = cmd.storeIndex(combinedIndex)
if err != nil {
return err
}
}
i++
} }
var err error jobCh := make(chan worker.Job)
if combinedIndex.Length() > 0 { resCh := make(chan worker.Job)
combinedIndex, err = cmd.storeIndex(combinedIndex) wp := worker.New(rebuildIndexWorkers, f, jobCh, resCh)
if err != nil {
return err go func() {
for id := range repo.List(backend.Data, done) {
jobCh <- worker.Job{Data: id}
} }
} close(jobCh)
}()
cmd.global.Printf("removing %d old indexes\n", len(indexIDs)) packs = make(map[backend.ID][]pack.Blob)
for id := range indexIDs { for job := range resCh {
debug.Log("RebuildIndex.RebuildIndex", "remove index %v", id.Str()) id := job.Data.(backend.ID)
err := cmd.repo.Backend().Remove(backend.Index, id.String()) if job.Error != nil {
if err != nil { fmt.Fprintf(os.Stderr, "error for pack %v: %v\n", id, job.Error)
debug.Log("RebuildIndex.RebuildIndex", "error removing index %v: %v", id.Str(), err)
return err
}
}
cmd.global.Printf("checking for additional packs\n")
newPacks := 0
var buf []byte
for packID := range cmd.repo.List(backend.Data, done) {
if packsDone.Has(packID) {
continue continue
} }
debug.Log("RebuildIndex.RebuildIndex", "pack %v not indexed", packID.Str()) entries := job.Result.([]pack.Blob)
newPacks++ packs[id] = entries
}
var err error wp.Wait()
h := backend.Handle{Type: backend.Data, Name: packID.String()} return packs
buf, err = backend.LoadAll(cmd.repo.Backend(), h, buf) }
if err != nil {
debug.Log("RebuildIndex.RebuildIndex", "error while loading pack %v", packID.Str())
return fmt.Errorf("error while loading pack %v: %v", packID.Str(), err)
}
hash := backend.Hash(buf) func listIndexIDs(repo *repository.Repository) (list backend.IDs) {
if !hash.Equal(packID) { done := make(chan struct{})
debug.Log("RebuildIndex.RebuildIndex", "Pack ID does not match, want %v, got %v", packID.Str(), hash.Str()) for id := range repo.List(backend.Index, done) {
return fmt.Errorf("Pack ID does not match, want %v, got %v", packID.Str(), hash.Str()) list = append(list, id)
} }
up, err := pack.NewUnpacker(cmd.repo.Key(), bytes.NewReader(buf)) return list
if err != nil { }
debug.Log("RebuildIndex.RebuildIndex", "error while unpacking pack %v", packID.Str())
return err
}
for _, blob := range up.Entries { func (cmd CmdRebuildIndex) RebuildIndex() error {
debug.Log("RebuildIndex.RebuildIndex", "pack %v: blob %v", packID.Str(), blob) debug.Log("RebuildIndex.RebuildIndex", "start rebuilding index")
combinedIndex.Store(repository.PackedBlob{
Type: blob.Type, packs := loadBlobsFromPacks(cmd.repo)
ID: blob.ID, cmd.global.Verbosef("loaded blobs from %d packs\n", len(packs))
idx := repository.NewIndex()
for packID, entries := range packs {
for _, entry := range entries {
pb := repository.PackedBlob{
ID: entry.ID,
Type: entry.Type,
Length: entry.Length,
Offset: entry.Offset,
PackID: packID, PackID: packID,
Offset: blob.Offset,
Length: blob.Length,
})
}
if repository.IndexFull(combinedIndex) {
combinedIndex, err = cmd.storeIndex(combinedIndex)
if err != nil {
return err
} }
idx.Store(pb)
} }
} }
if combinedIndex.Length() > 0 { oldIndexes := listIndexIDs(cmd.repo)
combinedIndex, err = cmd.storeIndex(combinedIndex) idx.AddToSupersedes(oldIndexes...)
cmd.global.Printf(" saving new index\n")
id, err := repository.SaveIndex(cmd.repo, idx)
if err != nil {
debug.Log("RebuildIndex.RebuildIndex", "error saving index: %v", err)
return err
}
debug.Log("RebuildIndex.RebuildIndex", "new index saved as %v", id.Str())
for _, indexID := range oldIndexes {
err := cmd.repo.Backend().Remove(backend.Index, indexID.String())
if err != nil { if err != nil {
return err cmd.global.Warnf("unable to remove index %v: %v\n", indexID.Str(), err)
} }
} }
cmd.global.Printf("added %d packs to the index\n", newPacks)
debug.Log("RebuildIndex.RebuildIndex", "done")
return nil return nil
} }