mirror of https://github.com/restic/restic.git
666 lines
14 KiB
Go
666 lines
14 KiB
Go
package restic
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"sync"
|
|
|
|
"github.com/juju/arrar"
|
|
"github.com/restic/restic/backend"
|
|
"github.com/restic/restic/chunker"
|
|
"github.com/restic/restic/debug"
|
|
"github.com/restic/restic/pipe"
|
|
)
|
|
|
|
const (
|
|
maxConcurrentBlobs = 32
|
|
maxConcurrency = 10
|
|
maxConcurrencyPreload = 20
|
|
|
|
// chunkerBufSize is used in pool.go
|
|
chunkerBufSize = 512 * chunker.KiB
|
|
)
|
|
|
|
type Archiver struct {
|
|
s Server
|
|
m *Map
|
|
|
|
blobToken chan struct{}
|
|
|
|
Error func(dir string, fi os.FileInfo, err error) error
|
|
Filter func(item string, fi os.FileInfo) bool
|
|
}
|
|
|
|
func NewArchiver(s Server) (*Archiver, error) {
|
|
var err error
|
|
arch := &Archiver{
|
|
s: s,
|
|
blobToken: make(chan struct{}, maxConcurrentBlobs),
|
|
}
|
|
|
|
// fill blob token
|
|
for i := 0; i < maxConcurrentBlobs; i++ {
|
|
arch.blobToken <- struct{}{}
|
|
}
|
|
|
|
// create new map to store all blobs in
|
|
arch.m = NewMap()
|
|
|
|
// abort on all errors
|
|
arch.Error = func(string, os.FileInfo, error) error { return err }
|
|
// allow all files
|
|
arch.Filter = func(string, os.FileInfo) bool { return true }
|
|
|
|
return arch, nil
|
|
}
|
|
|
|
// Preload loads all tree objects from repository and adds all blobs that are
|
|
// still available to the map for deduplication.
|
|
func (arch *Archiver) Preload(p *Progress) error {
|
|
cache, err := NewCache()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
p.Start()
|
|
defer p.Done()
|
|
|
|
debug.Log("Archiver.Preload", "Start loading known blobs")
|
|
|
|
// load all trees, in parallel
|
|
worker := func(wg *sync.WaitGroup, c <-chan backend.ID) {
|
|
for id := range c {
|
|
var tree *Tree
|
|
|
|
// load from cache
|
|
var t Tree
|
|
rd, err := cache.Load(backend.Tree, id)
|
|
if err == nil {
|
|
debug.Log("Archiver.Preload", "tree %v cached", id.Str())
|
|
tree = &t
|
|
dec := json.NewDecoder(rd)
|
|
err = dec.Decode(&t)
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
} else {
|
|
debug.Log("Archiver.Preload", "tree %v not cached: %v", id.Str(), err)
|
|
|
|
tree, err = LoadTree(arch.s, id)
|
|
// ignore error and advance to next tree
|
|
if err != nil {
|
|
continue
|
|
}
|
|
}
|
|
|
|
debug.Log("Archiver.Preload", "load tree %v with %d blobs", id, tree.Map.Len())
|
|
arch.m.Merge(tree.Map)
|
|
p.Report(Stat{Trees: 1, Blobs: uint64(tree.Map.Len())})
|
|
}
|
|
wg.Done()
|
|
}
|
|
|
|
idCh := make(chan backend.ID)
|
|
|
|
// start workers
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < maxConcurrencyPreload; i++ {
|
|
wg.Add(1)
|
|
go worker(&wg, idCh)
|
|
}
|
|
|
|
// list ids
|
|
trees := 0
|
|
err = arch.s.EachID(backend.Tree, func(id backend.ID) {
|
|
trees++
|
|
|
|
if trees%1000 == 0 {
|
|
debug.Log("Archiver.Preload", "Loaded %v trees", trees)
|
|
}
|
|
idCh <- id
|
|
})
|
|
|
|
close(idCh)
|
|
|
|
// wait for workers
|
|
wg.Wait()
|
|
|
|
debug.Log("Archiver.Preload", "Loaded %v blobs from %v trees", arch.m.Len(), trees)
|
|
|
|
return err
|
|
}
|
|
|
|
func (arch *Archiver) Save(t backend.Type, id backend.ID, length uint, rd io.Reader) (Blob, error) {
|
|
debug.Log("Archiver.Save", "Save(%v, %v)\n", t, id.Str())
|
|
|
|
// test if this blob is already known
|
|
blob, err := arch.m.FindID(id)
|
|
if err == nil {
|
|
debug.Log("Archiver.Save", "Save(%v, %v): reusing %v\n", t, id.Str(), blob.Storage.Str())
|
|
id.Free()
|
|
return blob, nil
|
|
}
|
|
|
|
// else encrypt and save data
|
|
blob, err = arch.s.SaveFrom(t, id, length, rd)
|
|
|
|
// store blob in storage map
|
|
smapblob := arch.m.Insert(blob)
|
|
|
|
// if the map has a different storage id for this plaintext blob, use that
|
|
// one and remove the other. This happens if the same plaintext blob was
|
|
// stored concurrently and finished earlier than this blob.
|
|
if blob.Storage.Compare(smapblob.Storage) != 0 {
|
|
debug.Log("Archiver.Save", "using other block, removing %v\n", blob.Storage.Str())
|
|
|
|
// remove the blob again
|
|
// TODO: implement a list of blobs in transport, so this doesn't happen so often
|
|
err = arch.s.Remove(t, blob.Storage)
|
|
if err != nil {
|
|
return Blob{}, err
|
|
}
|
|
}
|
|
|
|
debug.Log("Archiver.Save", "Save(%v, %v): new blob %v\n", t, id.Str(), blob)
|
|
|
|
return smapblob, nil
|
|
}
|
|
|
|
func (arch *Archiver) SaveTreeJSON(item interface{}) (Blob, error) {
|
|
// convert to json
|
|
data, err := json.Marshal(item)
|
|
// append newline
|
|
data = append(data, '\n')
|
|
if err != nil {
|
|
return Blob{}, err
|
|
}
|
|
|
|
// check if tree has been saved before
|
|
id := backend.Hash(data)
|
|
blob, err := arch.m.FindID(id)
|
|
|
|
// return the blob if we found it
|
|
if err == nil {
|
|
return blob, nil
|
|
}
|
|
|
|
// otherwise save the data
|
|
blob, err = arch.s.SaveJSON(backend.Tree, item)
|
|
if err != nil {
|
|
return Blob{}, err
|
|
}
|
|
|
|
// store blob in storage map
|
|
arch.m.Insert(blob)
|
|
|
|
return blob, nil
|
|
}
|
|
|
|
// SaveFile stores the content of the file on the backend as a Blob by calling
|
|
// Save for each chunk.
|
|
func (arch *Archiver) SaveFile(p *Progress, node *Node) (Blobs, error) {
|
|
file, err := os.Open(node.path)
|
|
defer file.Close()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// check file again
|
|
fi, err := file.Stat()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if fi.ModTime() != node.ModTime {
|
|
e2 := arch.Error(node.path, fi, errors.New("file was updated, using new version"))
|
|
|
|
if e2 == nil {
|
|
// create new node
|
|
n, err := NodeFromFileInfo(node.path, fi)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// copy node
|
|
*node = *n
|
|
}
|
|
}
|
|
|
|
var blobs Blobs
|
|
|
|
// store all chunks
|
|
chnker := GetChunker("archiver.SaveFile")
|
|
chnker.Reset(file)
|
|
chans := [](<-chan Blob){}
|
|
defer FreeChunker("archiver.SaveFile", chnker)
|
|
|
|
chunks := 0
|
|
|
|
for {
|
|
chunk, err := chnker.Next()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, arrar.Annotate(err, "SaveFile() chunker.Next()")
|
|
}
|
|
|
|
chunks++
|
|
|
|
// acquire token, start goroutine to save chunk
|
|
token := <-arch.blobToken
|
|
resCh := make(chan Blob, 1)
|
|
|
|
go func(ch chan<- Blob) {
|
|
blob, err := arch.Save(backend.Data, chunk.Digest, chunk.Length, chunk.Reader(file))
|
|
// TODO handle error
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
p.Report(Stat{Bytes: blob.Size})
|
|
arch.blobToken <- token
|
|
ch <- blob
|
|
}(resCh)
|
|
|
|
chans = append(chans, resCh)
|
|
}
|
|
|
|
blobs = []Blob{}
|
|
for _, ch := range chans {
|
|
blobs = append(blobs, <-ch)
|
|
}
|
|
|
|
if len(blobs) != chunks {
|
|
return nil, fmt.Errorf("chunker returned %v chunks, but only %v blobs saved", chunks, len(blobs))
|
|
}
|
|
|
|
var bytes uint64
|
|
|
|
node.Content = make([]backend.ID, len(blobs))
|
|
debug.Log("Archiver.Save", "checking size for file %s", node.path)
|
|
for i, blob := range blobs {
|
|
node.Content[i] = blob.ID
|
|
bytes += blob.Size
|
|
|
|
debug.Log("Archiver.Save", " adding blob %s", blob)
|
|
}
|
|
|
|
if bytes != node.Size {
|
|
return nil, fmt.Errorf("errors saving node %q: saved %d bytes, wanted %d bytes", node.path, bytes, node.Size)
|
|
}
|
|
|
|
debug.Log("Archiver.SaveFile", "SaveFile(%q): %v\n", node.path, blobs)
|
|
|
|
return blobs, nil
|
|
}
|
|
|
|
func (arch *Archiver) saveTree(p *Progress, t *Tree) (Blob, error) {
|
|
debug.Log("Archiver.saveTree", "saveTree(%v)\n", t)
|
|
var wg sync.WaitGroup
|
|
|
|
// add all blobs to global map
|
|
arch.m.Merge(t.Map)
|
|
|
|
// TODO: do all this in parallel
|
|
for _, node := range t.Nodes {
|
|
if node.tree != nil {
|
|
b, err := arch.saveTree(p, node.tree)
|
|
if err != nil {
|
|
return Blob{}, err
|
|
}
|
|
node.Subtree = b.ID
|
|
t.Map.Insert(b)
|
|
p.Report(Stat{Dirs: 1})
|
|
} else if node.Type == "file" {
|
|
if len(node.Content) > 0 {
|
|
removeContent := false
|
|
|
|
// check content
|
|
for _, id := range node.Content {
|
|
blob, err := t.Map.FindID(id)
|
|
if err != nil {
|
|
debug.Log("Archiver.saveTree", "unable to find storage id for data blob %v", id.Str())
|
|
arch.Error(node.path, nil, fmt.Errorf("unable to find storage id for data blob %v", id.Str()))
|
|
removeContent = true
|
|
t.Map.DeleteID(id)
|
|
arch.m.DeleteID(id)
|
|
continue
|
|
}
|
|
|
|
if ok, err := arch.s.Test(backend.Data, blob.Storage); !ok || err != nil {
|
|
debug.Log("Archiver.saveTree", "blob %v not in repository (error is %v)", blob, err)
|
|
arch.Error(node.path, nil, fmt.Errorf("blob %v not in repository (error is %v)", blob.Storage.Str(), err))
|
|
removeContent = true
|
|
t.Map.DeleteID(id)
|
|
arch.m.DeleteID(id)
|
|
}
|
|
}
|
|
|
|
if removeContent {
|
|
debug.Log("Archiver.saveTree", "removing content for %s", node.path)
|
|
node.Content = node.Content[:0]
|
|
}
|
|
}
|
|
|
|
if len(node.Content) == 0 {
|
|
// start goroutine
|
|
wg.Add(1)
|
|
go func(n *Node) {
|
|
defer wg.Done()
|
|
|
|
var blobs Blobs
|
|
blobs, n.err = arch.SaveFile(p, n)
|
|
for _, b := range blobs {
|
|
t.Map.Insert(b)
|
|
}
|
|
|
|
p.Report(Stat{Files: 1})
|
|
}(node)
|
|
}
|
|
}
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
usedIDs := backend.NewIDSet()
|
|
|
|
// check for invalid file nodes
|
|
for _, node := range t.Nodes {
|
|
if node.Type == "file" && node.Content == nil && node.err == nil {
|
|
return Blob{}, fmt.Errorf("node %v has empty content", node.Name)
|
|
}
|
|
|
|
// remember used hashes
|
|
if node.Type == "file" && node.Content != nil {
|
|
for _, id := range node.Content {
|
|
usedIDs.Insert(id)
|
|
}
|
|
}
|
|
|
|
if node.Type == "dir" && node.Subtree != nil {
|
|
usedIDs.Insert(node.Subtree)
|
|
}
|
|
|
|
if node.err != nil {
|
|
err := arch.Error(node.path, nil, node.err)
|
|
if err != nil {
|
|
return Blob{}, err
|
|
}
|
|
|
|
// save error message in node
|
|
node.Error = node.err.Error()
|
|
}
|
|
}
|
|
|
|
before := len(t.Map.IDs())
|
|
t.Map.Prune(usedIDs)
|
|
after := len(t.Map.IDs())
|
|
|
|
if before != after {
|
|
debug.Log("Archiver.saveTree", "pruned %d ids from map for tree %v\n", before-after, t)
|
|
}
|
|
|
|
blob, err := arch.SaveTreeJSON(t)
|
|
if err != nil {
|
|
return Blob{}, err
|
|
}
|
|
|
|
return blob, nil
|
|
}
|
|
|
|
func (arch *Archiver) fileWorker(wg *sync.WaitGroup, p *Progress, done <-chan struct{}, entCh <-chan pipe.Entry) {
|
|
defer wg.Done()
|
|
for {
|
|
select {
|
|
case e, ok := <-entCh:
|
|
if !ok {
|
|
// channel is closed
|
|
return
|
|
}
|
|
|
|
node, err := NodeFromFileInfo(e.Path, e.Info)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
if node.Type == "file" {
|
|
node.blobs, err = arch.SaveFile(p, node)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
e.Result <- node
|
|
p.Report(Stat{Files: 1})
|
|
case <-done:
|
|
// pipeline was cancelled
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (arch *Archiver) dirWorker(wg *sync.WaitGroup, p *Progress, done <-chan struct{}, dirCh <-chan pipe.Dir) {
|
|
defer wg.Done()
|
|
for {
|
|
select {
|
|
case dir, ok := <-dirCh:
|
|
if !ok {
|
|
// channel is closed
|
|
return
|
|
}
|
|
debug.Log("Archiver.DirWorker", "save dir %v\n", dir.Path)
|
|
|
|
tree := NewTree()
|
|
|
|
// wait for all content
|
|
for _, ch := range dir.Entries {
|
|
node := (<-ch).(*Node)
|
|
tree.Insert(node)
|
|
|
|
if node.Type == "dir" {
|
|
debug.Log("Archiver.DirWorker", "got tree node for %s: %v", node.path, node.blobs)
|
|
}
|
|
|
|
for _, blob := range node.blobs {
|
|
tree.Map.Insert(blob)
|
|
arch.m.Insert(blob)
|
|
}
|
|
}
|
|
|
|
node, err := NodeFromFileInfo(dir.Path, dir.Info)
|
|
if err != nil {
|
|
node.Error = err.Error()
|
|
dir.Result <- node
|
|
continue
|
|
}
|
|
|
|
blob, err := arch.SaveTreeJSON(tree)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
debug.Log("Archiver.DirWorker", "save tree for %s: %v", dir.Path, blob)
|
|
|
|
node.Subtree = blob.ID
|
|
node.blobs = Blobs{blob}
|
|
|
|
dir.Result <- node
|
|
p.Report(Stat{Dirs: 1})
|
|
case <-done:
|
|
// pipeline was cancelled
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func compareWithOldTree(newCh <-chan interface{}, oldCh <-chan WalkTreeJob, outCh chan<- interface{}) {
|
|
debug.Log("Archiver.compareWithOldTree", "start")
|
|
defer func() {
|
|
debug.Log("Archiver.compareWithOldTree", "done")
|
|
}()
|
|
for {
|
|
debug.Log("Archiver.compareWithOldTree", "waiting for new job")
|
|
newJob, ok := <-newCh
|
|
if !ok {
|
|
// channel is closed
|
|
return
|
|
}
|
|
|
|
debug.Log("Archiver.compareWithOldTree", "received new job %v", newJob)
|
|
oldJob, ok := <-oldCh
|
|
if !ok {
|
|
// channel is closed
|
|
return
|
|
}
|
|
|
|
debug.Log("Archiver.compareWithOldTree", "received old job %v", oldJob)
|
|
|
|
outCh <- newJob
|
|
}
|
|
}
|
|
|
|
func (arch *Archiver) Snapshot(p *Progress, paths []string, parentSnapshot backend.ID) (*Snapshot, backend.ID, error) {
|
|
debug.Log("Archiver.Snapshot", "start for %v", paths)
|
|
|
|
debug.Break("Archiver.Snapshot")
|
|
sort.Strings(paths)
|
|
|
|
p.Start()
|
|
defer p.Done()
|
|
|
|
sn, err := NewSnapshot(paths)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
// load parent snapshot
|
|
// var oldRoot backend.ID
|
|
// if parentSnapshot != nil {
|
|
// sn.Parent = parentSnapshot
|
|
// parentSn, err := LoadSnapshot(arch.s, parentSnapshot)
|
|
// if err != nil {
|
|
// return nil, nil, err
|
|
// }
|
|
// oldRoot = parentSn.Tree.Storage
|
|
// }
|
|
|
|
// signal the whole pipeline to stop
|
|
done := make(chan struct{})
|
|
|
|
// if we have an old root, start walker and comparer
|
|
// oldTreeCh := make(chan WalkTreeJob)
|
|
// if oldRoot != nil {
|
|
// // start walking the old tree
|
|
// debug.Log("Archiver.Snapshot", "start comparer for old root %v", oldRoot.Str())
|
|
// go WalkTree(arch.s, oldRoot, done, oldTreeCh)
|
|
// }
|
|
|
|
var wg sync.WaitGroup
|
|
entCh := make(chan pipe.Entry)
|
|
dirCh := make(chan pipe.Dir)
|
|
jobsCh := make(chan interface{})
|
|
|
|
// split
|
|
wg.Add(1)
|
|
go func() {
|
|
pipe.Split(jobsCh, dirCh, entCh)
|
|
close(dirCh)
|
|
close(entCh)
|
|
wg.Done()
|
|
}()
|
|
|
|
// run workers
|
|
for i := 0; i < maxConcurrency; i++ {
|
|
wg.Add(2)
|
|
go arch.fileWorker(&wg, p, done, entCh)
|
|
go arch.dirWorker(&wg, p, done, dirCh)
|
|
}
|
|
|
|
// start walker
|
|
resCh, err := pipe.Walk(paths, done, jobsCh)
|
|
if err != nil {
|
|
close(done)
|
|
|
|
debug.Log("Archiver.Snapshot", "pipe.Walke returned error %v", err)
|
|
return nil, nil, err
|
|
}
|
|
|
|
// wait for all workers to terminate
|
|
debug.Log("Archiver.Snapshot", "wait for workers")
|
|
wg.Wait()
|
|
|
|
debug.Log("Archiver.Snapshot", "workers terminated")
|
|
|
|
// add the top-level tree
|
|
tree := NewTree()
|
|
root := (<-resCh).(pipe.Dir)
|
|
for i := 0; i < len(paths); i++ {
|
|
node := (<-root.Entries[i]).(*Node)
|
|
|
|
debug.Log("Archiver.Snapshot", "got toplevel node %v", node)
|
|
|
|
tree.Insert(node)
|
|
for _, blob := range node.blobs {
|
|
blob = arch.m.Insert(blob)
|
|
tree.Map.Insert(blob)
|
|
}
|
|
}
|
|
|
|
tb, err := arch.SaveTreeJSON(tree)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
sn.Tree = tb
|
|
|
|
// save snapshot
|
|
blob, err := arch.s.SaveJSON(backend.Snapshot, sn)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
return sn, blob.Storage, nil
|
|
}
|
|
|
|
func isFile(fi os.FileInfo) bool {
|
|
return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
|
|
}
|
|
|
|
func Scan(dirs []string, p *Progress) (Stat, error) {
|
|
p.Start()
|
|
defer p.Done()
|
|
|
|
var stat Stat
|
|
|
|
for _, dir := range dirs {
|
|
err := filepath.Walk(dir, func(str string, fi os.FileInfo, err error) error {
|
|
s := Stat{}
|
|
if isFile(fi) {
|
|
s.Files++
|
|
s.Bytes += uint64(fi.Size())
|
|
} else if fi.IsDir() {
|
|
s.Dirs++
|
|
}
|
|
|
|
p.Report(s)
|
|
stat.Add(s)
|
|
|
|
// TODO: handle error?
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
}
|
|
|
|
return stat, nil
|
|
}
|