restic/archiver.go

382 lines
8.4 KiB
Go
Raw Normal View History

2014-12-05 20:45:49 +00:00
package restic
2014-09-23 20:39:12 +00:00
import (
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"io"
2014-09-23 20:39:12 +00:00
"os"
2014-11-16 21:50:20 +00:00
"sync"
2014-09-23 20:39:12 +00:00
2014-11-23 11:14:56 +00:00
"github.com/juju/arrar"
2014-12-05 20:45:49 +00:00
"github.com/restic/restic/backend"
"github.com/restic/restic/chunker"
2015-01-14 21:08:48 +00:00
"github.com/restic/restic/debug"
2014-09-23 20:39:12 +00:00
)
2014-11-16 21:50:20 +00:00
const (
maxConcurrentFiles = 16
maxConcurrentBlobs = 16
chunkerBufSize = 512 * chunker.KiB
2014-11-16 21:50:20 +00:00
)
2014-09-23 20:39:12 +00:00
type Archiver struct {
s Server
m *Map
2014-09-23 20:39:12 +00:00
2014-11-16 21:50:20 +00:00
fileToken chan struct{}
2014-11-22 21:05:39 +00:00
blobToken chan struct{}
2014-11-16 21:50:20 +00:00
2014-09-23 20:39:12 +00:00
Error func(dir string, fi os.FileInfo, err error) error
Filter func(item string, fi os.FileInfo) bool
p *Progress
2014-09-23 20:39:12 +00:00
}
func NewArchiver(s Server, p *Progress) (*Archiver, error) {
2014-09-23 20:39:12 +00:00
var err error
2014-11-16 21:50:20 +00:00
arch := &Archiver{
2014-12-21 16:02:49 +00:00
s: s,
p: p,
2014-11-16 21:50:20 +00:00
fileToken: make(chan struct{}, maxConcurrentFiles),
2014-11-22 21:05:39 +00:00
blobToken: make(chan struct{}, maxConcurrentBlobs),
2014-11-16 21:50:20 +00:00
}
2014-11-22 21:05:39 +00:00
// fill file and blob token
2014-11-16 21:50:20 +00:00
for i := 0; i < maxConcurrentFiles; i++ {
arch.fileToken <- struct{}{}
}
2014-09-23 20:39:12 +00:00
2014-11-22 21:05:39 +00:00
for i := 0; i < maxConcurrentBlobs; i++ {
arch.blobToken <- struct{}{}
}
// create new map to store all blobs in
arch.m = NewMap()
2014-09-23 20:39:12 +00:00
// abort on all errors
arch.Error = func(string, os.FileInfo, error) error { return err }
// allow all files
arch.Filter = func(string, os.FileInfo) bool { return true }
return arch, nil
}
func (arch *Archiver) Save(t backend.Type, id backend.ID, length uint, rd io.Reader) (Blob, error) {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.Save", "Save(%v, %v)\n", t, id.Str())
// test if this blob is already known
blob, err := arch.m.FindID(id)
if err == nil {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.Save", "Save(%v, %v): reusing %v\n", t, id.Str(), blob.Storage.Str())
id.Free()
return blob, nil
}
2014-09-23 20:39:12 +00:00
// else encrypt and save data
blob, err = arch.s.SaveFrom(t, id, length, rd)
// store blob in storage map
smapblob := arch.m.Insert(blob)
// if the map has a different storage id for this plaintext blob, use that
// one and remove the other. This happens if the same plaintext blob was
// stored concurrently and finished earlier than this blob.
if blob.Storage.Compare(smapblob.Storage) != 0 {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.Save", "using other block, removing %v\n", blob.Storage.Str())
// remove the blob again
// TODO: implement a list of blobs in transport, so this doesn't happen so often
err = arch.s.Remove(t, blob.Storage)
if err != nil {
return Blob{}, err
}
2014-09-23 20:39:12 +00:00
}
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.Save", "Save(%v, %v): new blob %v\n", t, id.Str(), blob)
return smapblob, nil
2014-09-23 20:39:12 +00:00
}
func (arch *Archiver) SaveTreeJSON(item interface{}) (Blob, error) {
// convert to json
data, err := json.Marshal(item)
2014-09-23 20:39:12 +00:00
if err != nil {
2014-11-21 20:21:44 +00:00
return Blob{}, err
2014-09-23 20:39:12 +00:00
}
// check if tree has been saved before
buf := backend.Compress(data)
id := backend.Hash(buf)
blob, err := arch.m.FindID(id)
2014-09-23 20:39:12 +00:00
// return the blob if we found it
if err == nil {
return blob, nil
}
2014-09-23 20:39:12 +00:00
// otherwise save the data
blob, err = arch.s.Save(backend.Tree, buf, id)
2014-09-23 20:39:12 +00:00
if err != nil {
2014-11-21 20:21:44 +00:00
return Blob{}, err
2014-09-23 20:39:12 +00:00
}
// store blob in storage map
arch.m.Insert(blob)
2014-09-23 20:39:12 +00:00
return blob, nil
}
// SaveFile stores the content of the file on the backend as a Blob by calling
// Save for each chunk.
func (arch *Archiver) SaveFile(node *Node) (Blobs, error) {
file, err := os.Open(node.path)
defer file.Close()
2014-09-23 20:39:12 +00:00
if err != nil {
return nil, err
}
// check file again
fi, err := file.Stat()
if err != nil {
return nil, err
}
if fi.ModTime() != node.ModTime {
e2 := arch.Error(node.path, fi, errors.New("file was updated, using new version"))
if e2 == nil {
// create new node
n, err := NodeFromFileInfo(node.path, fi)
if err != nil {
return nil, err
}
// copy node
*node = *n
}
}
var blobs Blobs
// store all chunks
chnker := chunker.New(file, chunkerBufSize, sha256.New)
chans := [](<-chan Blob){}
//defer chnker.Free()
chunks := 0
for {
buf := GetChunkBuf("blob chunker")
chunk, err := chnker.Next()
if err == io.EOF {
FreeChunkBuf("blob chunker", buf)
break
}
if err != nil {
FreeChunkBuf("blob chunker", buf)
return nil, arrar.Annotate(err, "SaveFile() chunker.Next()")
}
chunks++
// acquire token, start goroutine to save chunk
token := <-arch.blobToken
resCh := make(chan Blob, 1)
2014-11-22 21:05:39 +00:00
go func(ch chan<- Blob) {
blob, err := arch.Save(backend.Data, chunk.Digest, chunk.Length, chunk.Reader(file))
// TODO handle error
if err != nil {
panic(err)
}
FreeChunkBuf("blob chunker", buf)
arch.p.Report(Stat{Bytes: blob.Size})
arch.blobToken <- token
ch <- blob
}(resCh)
2014-11-22 21:05:39 +00:00
chans = append(chans, resCh)
}
blobs = []Blob{}
for _, ch := range chans {
blobs = append(blobs, <-ch)
}
if len(blobs) != chunks {
return nil, fmt.Errorf("chunker returned %v chunks, but only %v blobs saved", chunks, len(blobs))
2014-09-23 20:39:12 +00:00
}
var bytes uint64
2014-09-23 20:39:12 +00:00
node.Content = make([]backend.ID, len(blobs))
debug.Log("Archiver.Save", "checking size for file %s", node.path)
2014-09-23 20:39:12 +00:00
for i, blob := range blobs {
node.Content[i] = blob.ID
bytes += blob.Size
debug.Log("Archiver.Save", " adding blob %s", blob)
}
if bytes != node.Size {
return nil, fmt.Errorf("errors saving node %q: saved %d bytes, wanted %d bytes", node.path, bytes, node.Size)
2014-09-23 20:39:12 +00:00
}
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.SaveFile", "SaveFile(%q): %v\n", node.path, blobs)
return blobs, nil
2014-09-23 20:39:12 +00:00
}
2014-11-21 20:21:44 +00:00
func (arch *Archiver) saveTree(t *Tree) (Blob, error) {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.saveTree", "saveTree(%v)\n", t)
2014-11-16 21:50:20 +00:00
var wg sync.WaitGroup
// add all blobs to global map
arch.m.Merge(t.Map)
// TODO: do all this in parallel
for _, node := range t.Nodes {
if node.tree != nil {
b, err := arch.saveTree(node.tree)
if err != nil {
2014-11-21 20:21:44 +00:00
return Blob{}, err
}
node.Subtree = b.ID
t.Map.Insert(b)
arch.p.Report(Stat{Dirs: 1})
} else if node.Type == "file" {
if len(node.Content) > 0 {
removeContent := false
// check content
for _, id := range node.Content {
blob, err := t.Map.FindID(id)
if err != nil {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.saveTree", "unable to find storage id for data blob %v", id.Str())
arch.Error(node.path, nil, fmt.Errorf("unable to find storage id for data blob %v", id.Str()))
removeContent = true
t.Map.DeleteID(id)
arch.m.DeleteID(id)
continue
}
if ok, err := arch.s.Test(backend.Data, blob.Storage); !ok || err != nil {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.saveTree", "blob %v not in repository (error is %v)", blob, err)
arch.Error(node.path, nil, fmt.Errorf("blob %v not in repository (error is %v)", blob.Storage.Str(), err))
removeContent = true
t.Map.DeleteID(id)
arch.m.DeleteID(id)
}
}
if removeContent {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.saveTree", "removing content for %s", node.path)
node.Content = node.Content[:0]
}
}
if len(node.Content) == 0 {
// get token
token := <-arch.fileToken
// start goroutine
wg.Add(1)
go func(n *Node) {
defer wg.Done()
defer func() {
arch.fileToken <- token
}()
var blobs Blobs
2015-01-14 16:48:23 +00:00
blobs, n.err = arch.SaveFile(n)
for _, b := range blobs {
t.Map.Insert(b)
}
arch.p.Report(Stat{Files: 1})
}(node)
}
2014-09-23 20:39:12 +00:00
}
}
2014-11-16 21:50:20 +00:00
wg.Wait()
usedIDs := backend.NewIDSet()
2014-11-30 23:06:29 +00:00
// check for invalid file nodes
for _, node := range t.Nodes {
if node.Type == "file" && node.Content == nil && node.err == nil {
2014-11-30 23:06:29 +00:00
return Blob{}, fmt.Errorf("node %v has empty content", node.Name)
}
// remember used hashes
if node.Type == "file" && node.Content != nil {
for _, id := range node.Content {
usedIDs.Insert(id)
}
}
if node.Type == "dir" && node.Subtree != nil {
usedIDs.Insert(node.Subtree)
}
if node.err != nil {
err := arch.Error(node.path, nil, node.err)
if err != nil {
return Blob{}, err
}
// save error message in node
node.Error = node.err.Error()
}
2014-11-30 23:06:29 +00:00
}
before := len(t.Map.IDs())
t.Map.Prune(usedIDs)
after := len(t.Map.IDs())
if before != after {
2015-01-14 21:08:48 +00:00
debug.Log("Archiver.saveTree", "pruned %d ids from map for tree %v\n", before-after, t)
}
blob, err := arch.SaveTreeJSON(t)
if err != nil {
2014-11-21 20:21:44 +00:00
return Blob{}, err
}
return blob, nil
}
2014-11-30 21:34:21 +00:00
func (arch *Archiver) Snapshot(dir string, t *Tree, parentSnapshot backend.ID) (*Snapshot, backend.ID, error) {
2015-01-14 21:08:48 +00:00
debug.Break("Archiver.Snapshot")
arch.p.Start()
defer arch.p.Done()
sn, err := NewSnapshot(dir)
if err != nil {
return nil, nil, err
}
2014-11-30 21:34:21 +00:00
sn.Parent = parentSnapshot
blob, err := arch.saveTree(t)
2014-09-23 20:39:12 +00:00
if err != nil {
return nil, nil, err
}
sn.Tree = blob
2014-11-23 21:26:01 +00:00
2014-09-23 20:39:12 +00:00
// save snapshot
blob, err = arch.s.SaveJSON(backend.Snapshot, sn)
2014-09-23 20:39:12 +00:00
if err != nil {
return nil, nil, err
}
return sn, blob.Storage, nil
2014-09-23 20:39:12 +00:00
}