restic/archiver_test.go

356 lines
8.3 KiB
Go
Raw Normal View History

2014-12-05 20:45:49 +00:00
package restic_test
2014-11-16 20:41:05 +00:00
import (
"bytes"
"crypto/sha256"
2014-11-16 20:41:05 +00:00
"io"
"math"
2014-11-16 20:41:05 +00:00
"testing"
2015-07-07 22:55:58 +00:00
"github.com/restic/chunker"
2014-12-05 20:45:49 +00:00
"github.com/restic/restic"
2015-02-17 21:39:44 +00:00
"github.com/restic/restic/backend"
"github.com/restic/restic/checker"
"github.com/restic/restic/crypto"
"github.com/restic/restic/pack"
"github.com/restic/restic/repository"
2015-04-09 19:15:48 +00:00
. "github.com/restic/restic/test"
2014-11-16 20:41:05 +00:00
)
var testPol = chunker.Pol(0x3DA3358B4DC173)
2015-02-17 19:02:43 +00:00
type Rdr interface {
io.ReadSeeker
io.ReaderAt
2015-02-17 19:02:43 +00:00
}
type chunkedData struct {
buf []byte
chunks []*chunker.Chunk
}
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
rd.Seek(0, 0)
ch := chunker.New(rd, testPol, sha256.New())
for {
chunk, err := ch.Next()
if err == io.EOF {
break
}
2015-04-09 19:15:48 +00:00
OK(b, err)
2015-02-17 19:02:43 +00:00
// reduce length of buf
buf = buf[:chunk.Length]
n, err := io.ReadFull(chunk.Reader(rd), buf)
2015-04-09 19:15:48 +00:00
OK(b, err)
Assert(b, uint(n) == chunk.Length, "invalid length: got %d, expected %d", n, chunk.Length)
_, err = crypto.Encrypt(key, buf2, buf)
2015-04-09 19:15:48 +00:00
OK(b, err)
}
}
2014-11-16 20:41:05 +00:00
func BenchmarkChunkEncrypt(b *testing.B) {
repo := SetupRepo()
defer TeardownRepo(repo)
data := Random(23, 10<<20) // 10MiB
rd := bytes.NewReader(data)
2014-11-16 20:41:05 +00:00
2015-05-04 22:14:07 +00:00
buf := make([]byte, chunker.MaxSize)
buf2 := make([]byte, chunker.MaxSize)
2015-02-17 19:02:43 +00:00
2014-11-16 20:41:05 +00:00
b.ResetTimer()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
benchmarkChunkEncrypt(b, buf, buf2, rd, repo.Key())
}
2015-02-17 19:02:43 +00:00
}
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
ch := chunker.New(rd, testPol, sha256.New())
2014-11-16 20:41:05 +00:00
for {
chunk, err := ch.Next()
if err == io.EOF {
break
}
2014-11-16 20:41:05 +00:00
// reduce length of chunkBuf
2015-02-17 19:02:43 +00:00
buf = buf[:chunk.Length]
io.ReadFull(chunk.Reader(rd), buf)
crypto.Encrypt(key, buf, buf)
}
}
func BenchmarkChunkEncryptParallel(b *testing.B) {
repo := SetupRepo()
defer TeardownRepo(repo)
data := Random(23, 10<<20) // 10MiB
2015-05-04 22:14:07 +00:00
buf := make([]byte, chunker.MaxSize)
2015-02-17 19:02:43 +00:00
b.ResetTimer()
b.SetBytes(int64(len(data)))
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
rd := bytes.NewReader(data)
benchmarkChunkEncryptP(pb, buf, rd, repo.Key())
2014-11-16 20:41:05 +00:00
}
})
2014-11-16 20:41:05 +00:00
}
func archiveDirectory(b testing.TB) {
repo := SetupRepo()
defer TeardownRepo(repo)
2015-05-09 11:32:52 +00:00
arch := restic.NewArchiver(repo)
2015-06-28 11:15:35 +00:00
_, id, err := arch.Snapshot(nil, []string{BenchArchiveDirectory}, nil)
2015-04-30 01:41:51 +00:00
OK(b, err)
b.Logf("snapshot archived as %v", id)
}
2015-02-17 21:39:44 +00:00
func TestArchiveDirectory(t *testing.T) {
2015-06-28 11:15:35 +00:00
if BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping TestArchiveDirectory")
}
archiveDirectory(t)
}
func BenchmarkArchiveDirectory(b *testing.B) {
2015-06-28 11:15:35 +00:00
if BenchArchiveDirectory == "" {
b.Skip("benchdir not set, skipping BenchmarkArchiveDirectory")
}
2015-05-01 20:58:50 +00:00
for i := 0; i < b.N; i++ {
archiveDirectory(b)
}
2015-02-17 21:39:44 +00:00
}
func archiveWithDedup(t testing.TB) {
repo := SetupRepo()
defer TeardownRepo(repo)
2015-06-28 11:15:35 +00:00
if BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping TestArchiverDedup")
2015-02-17 22:40:37 +00:00
}
var cnt struct {
before, after, after2 struct {
packs, dataBlobs, treeBlobs uint
}
}
2015-02-17 21:39:44 +00:00
// archive a few files
2015-06-28 11:15:35 +00:00
sn := SnapshotDir(t, repo, BenchArchiveDirectory, nil)
2015-03-09 21:58:17 +00:00
t.Logf("archived snapshot %v", sn.ID().Str())
2015-02-17 21:39:44 +00:00
// get archive stats
2015-05-09 11:32:52 +00:00
cnt.before.packs = repo.Count(backend.Data)
cnt.before.dataBlobs = repo.Index().Count(pack.Data)
cnt.before.treeBlobs = repo.Index().Count(pack.Tree)
t.Logf("packs %v, data blobs %v, tree blobs %v",
cnt.before.packs, cnt.before.dataBlobs, cnt.before.treeBlobs)
2015-02-17 21:39:44 +00:00
2015-03-09 21:58:17 +00:00
// archive the same files again, without parent snapshot
2015-06-28 11:15:35 +00:00
sn2 := SnapshotDir(t, repo, BenchArchiveDirectory, nil)
2015-03-09 21:58:17 +00:00
t.Logf("archived snapshot %v", sn2.ID().Str())
2015-02-17 21:39:44 +00:00
// get archive stats again
2015-05-09 11:32:52 +00:00
cnt.after.packs = repo.Count(backend.Data)
cnt.after.dataBlobs = repo.Index().Count(pack.Data)
cnt.after.treeBlobs = repo.Index().Count(pack.Tree)
t.Logf("packs %v, data blobs %v, tree blobs %v",
cnt.after.packs, cnt.after.dataBlobs, cnt.after.treeBlobs)
// if there are more data blobs, something is wrong
if cnt.after.dataBlobs > cnt.before.dataBlobs {
t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
cnt.before.dataBlobs, cnt.after.dataBlobs)
}
2015-03-09 21:58:17 +00:00
// archive the same files again, with a parent snapshot
2015-06-28 11:15:35 +00:00
sn3 := SnapshotDir(t, repo, BenchArchiveDirectory, sn2.ID())
2015-03-09 21:58:17 +00:00
t.Logf("archived snapshot %v, parent %v", sn3.ID().Str(), sn2.ID().Str())
// get archive stats again
2015-05-09 11:32:52 +00:00
cnt.after2.packs = repo.Count(backend.Data)
cnt.after2.dataBlobs = repo.Index().Count(pack.Data)
cnt.after2.treeBlobs = repo.Index().Count(pack.Tree)
t.Logf("packs %v, data blobs %v, tree blobs %v",
cnt.after2.packs, cnt.after2.dataBlobs, cnt.after2.treeBlobs)
// if there are more data blobs, something is wrong
if cnt.after2.dataBlobs > cnt.before.dataBlobs {
t.Fatalf("TestArchiverDedup: too many data blobs in repository: before %d, after %d",
cnt.before.dataBlobs, cnt.after2.dataBlobs)
}
2015-02-17 21:39:44 +00:00
}
2015-02-21 23:09:57 +00:00
func TestArchiveDedup(t *testing.T) {
archiveWithDedup(t)
}
2015-02-21 23:09:57 +00:00
func BenchmarkLoadTree(t *testing.B) {
repo := SetupRepo()
defer TeardownRepo(repo)
2015-06-28 11:15:35 +00:00
if BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping TestArchiverDedup")
2015-02-21 23:09:57 +00:00
}
// archive a few files
arch := restic.NewArchiver(repo)
2015-06-28 11:15:35 +00:00
sn, _, err := arch.Snapshot(nil, []string{BenchArchiveDirectory}, nil)
2015-04-09 19:15:48 +00:00
OK(t, err)
2015-02-21 23:09:57 +00:00
t.Logf("archived snapshot %v", sn.ID())
2015-03-28 10:50:23 +00:00
list := make([]backend.ID, 0, 10)
done := make(chan struct{})
2015-10-12 20:34:12 +00:00
for _, idx := range repo.Index().All() {
for blob := range idx.Each(done) {
if blob.Type != pack.Tree {
continue
}
list = append(list, blob.ID)
if len(list) == cap(list) {
close(done)
break
}
2015-03-28 10:50:23 +00:00
}
}
2015-02-21 23:09:57 +00:00
// start benchmark
t.ResetTimer()
for i := 0; i < t.N; i++ {
for _, id := range list {
_, err := restic.LoadTree(repo, id)
2015-04-09 19:15:48 +00:00
OK(t, err)
2015-03-28 10:50:23 +00:00
}
2015-02-21 23:09:57 +00:00
}
}
// Saves several identical chunks concurrently and later check that there are no
// unreferenced packs in the repository. See also #292 and #358.
// The combination of high duplication and high concurrency should provoke any
// issues leading to unreferenced packs.
func TestParallelSaveWithHighDuplication(t *testing.T) {
repo := SetupRepo()
defer TeardownRepo(repo)
// For every seed a pseudo-random 32Mb blob is generated and split into
// chunks. During the test all chunks of all blobs are processed in parallel
// goroutines. To increase duplication, each chunk is processed
// <duplication> times. Concurrency can be limited by changing <maxParallel>.
// Note: seeds 5, 3, 66, 4, 12 produce the most chunks (descending)
seeds := []int{5, 3, 66, 4, 12}
maxParallel := math.MaxInt32
duplication := 15
arch := restic.NewArchiver(repo)
data := getRandomData(seeds)
barrier := make(chan struct{}, maxParallel)
errChannels := [](<-chan error){}
for _, d := range data {
for _, c := range d.chunks {
for dupIdx := 0; dupIdx < duplication; dupIdx++ {
errChan := make(chan error)
errChannels = append(errChannels, errChan)
go func(buf *[]byte, c *chunker.Chunk, errChan chan<- error) {
barrier <- struct{}{}
hash := c.Digest
id := backend.ID{}
copy(id[:], hash)
err := arch.Save(pack.Data, id, c.Length, c.Reader(bytes.NewReader(*buf)))
<-barrier
errChan <- err
}(&d.buf, c, errChan)
}
}
}
for _, errChan := range errChannels {
OK(t, <-errChan)
}
OK(t, repo.Flush())
OK(t, repo.SaveIndex())
chkr := createAndInitChecker(t, repo)
assertNoUnreferencedPacks(t, chkr)
}
func getRandomData(seeds []int) []*chunkedData {
chunks := []*chunkedData{}
sem := make(chan struct{}, len(seeds))
for seed := range seeds {
c := &chunkedData{}
chunks = append(chunks, c)
go func(seed int, data *chunkedData) {
data.buf = Random(seed, 32*1024*1024)
chunker := chunker.New(bytes.NewReader(data.buf), testPol, sha256.New())
for {
c, err := chunker.Next()
if err == io.EOF {
break
}
data.chunks = append(data.chunks, c)
}
sem <- struct{}{}
}(seed, c)
}
for i := 0; i < len(seeds); i++ {
<-sem
}
return chunks
}
func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker {
chkr := checker.New(repo)
hints, errs := chkr.LoadIndex()
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
}
if len(hints) > 0 {
t.Errorf("expected no hints, got %v: %v", len(hints), hints)
}
return chkr
}
func assertNoUnreferencedPacks(t *testing.T, chkr *checker.Checker) {
done := make(chan struct{})
defer close(done)
errChan := make(chan error)
go chkr.Packs(errChan, done)
for err := range errChan {
OK(t, err)
}
}