restic/internal/repository/repository_internal_test.go

532 lines
12 KiB
Go

package repository
import (
"bytes"
"context"
"encoding/json"
"io"
"math/rand"
"sort"
"strings"
"testing"
"github.com/cenkalti/backoff/v4"
"github.com/google/go-cmp/cmp"
"github.com/klauspost/compress/zstd"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
type mapcache map[backend.Handle]bool
func (c mapcache) Has(h backend.Handle) bool { return c[h] }
func TestSortCachedPacksFirst(t *testing.T) {
var (
blobs, sorted [100]restic.PackedBlob
cache = make(mapcache)
r = rand.New(rand.NewSource(1261))
)
for i := 0; i < len(blobs); i++ {
var id restic.ID
r.Read(id[:])
blobs[i] = restic.PackedBlob{PackID: id}
if i%3 == 0 {
h := backend.Handle{Name: id.String(), Type: backend.PackFile}
cache[h] = true
}
}
copy(sorted[:], blobs[:])
sort.SliceStable(sorted[:], func(i, j int) bool {
hi := backend.Handle{Type: backend.PackFile, Name: sorted[i].PackID.String()}
hj := backend.Handle{Type: backend.PackFile, Name: sorted[j].PackID.String()}
return cache.Has(hi) && !cache.Has(hj)
})
sortCachedPacksFirst(cache, blobs[:])
rtest.Equals(t, sorted, blobs)
}
func BenchmarkSortCachedPacksFirst(b *testing.B) {
const nblobs = 512 // Corresponds to a file of ca. 2GB.
var (
blobs [nblobs]restic.PackedBlob
cache = make(mapcache)
r = rand.New(rand.NewSource(1261))
)
for i := 0; i < nblobs; i++ {
var id restic.ID
r.Read(id[:])
blobs[i] = restic.PackedBlob{PackID: id}
if i%3 == 0 {
h := backend.Handle{Name: id.String(), Type: backend.PackFile}
cache[h] = true
}
}
var cpy [nblobs]restic.PackedBlob
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
copy(cpy[:], blobs[:])
sortCachedPacksFirst(cache, cpy[:])
}
}
// buildPackfileWithoutHeader returns a manually built pack file without a header.
func buildPackfileWithoutHeader(blobSizes []int, key *crypto.Key, compress bool) (blobs []restic.Blob, packfile []byte) {
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(zstd.SpeedDefault),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
var offset uint
for i, size := range blobSizes {
plaintext := rtest.Random(800+i, size)
id := restic.Hash(plaintext)
uncompressedLength := uint(0)
if compress {
uncompressedLength = uint(len(plaintext))
plaintext = enc.EncodeAll(plaintext, nil)
}
// we use a deterministic nonce here so the whole process is
// deterministic, last byte is the blob index
var nonce = []byte{
0x15, 0x98, 0xc0, 0xf7, 0xb9, 0x65, 0x97, 0x74,
0x12, 0xdc, 0xd3, 0x62, 0xa9, 0x6e, 0x20, byte(i),
}
before := len(packfile)
packfile = append(packfile, nonce...)
packfile = key.Seal(packfile, nonce, plaintext, nil)
after := len(packfile)
ciphertextLength := after - before
blobs = append(blobs, restic.Blob{
BlobHandle: restic.BlobHandle{
Type: restic.DataBlob,
ID: id,
},
Length: uint(ciphertextLength),
UncompressedLength: uncompressedLength,
Offset: offset,
})
offset = uint(len(packfile))
}
return blobs, packfile
}
func TestStreamPack(t *testing.T) {
TestAllVersions(t, testStreamPack)
}
func testStreamPack(t *testing.T, version uint) {
dec, err := zstd.NewReader(nil)
if err != nil {
panic(dec)
}
defer dec.Close()
// always use the same key for deterministic output
key := testKey(t)
blobSizes := []int{
5522811,
10,
5231,
18812,
123123,
13522811,
12301,
892242,
28616,
13351,
252287,
188883,
3522811,
18883,
}
var compress bool
switch version {
case 1:
compress = false
case 2:
compress = true
default:
t.Fatal("test does not support repository version", version)
}
packfileBlobs, packfile := buildPackfileWithoutHeader(blobSizes, &key, compress)
loadCalls := 0
shortFirstLoad := false
loadBytes := func(length int, offset int64) []byte {
data := packfile
if offset > int64(len(data)) {
offset = 0
length = 0
}
data = data[offset:]
if length > len(data) {
length = len(data)
}
if shortFirstLoad {
length /= 2
shortFirstLoad = false
}
return data[:length]
}
load := func(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
data := loadBytes(length, offset)
if shortFirstLoad {
data = data[:len(data)/2]
shortFirstLoad = false
}
loadCalls++
err := fn(bytes.NewReader(data))
if err == nil {
return nil
}
var permanent *backoff.PermanentError
if errors.As(err, &permanent) {
return err
}
// retry loading once
return fn(bytes.NewReader(loadBytes(length, offset)))
}
// first, test regular usage
t.Run("regular", func(t *testing.T) {
tests := []struct {
blobs []restic.Blob
calls int
shortFirstLoad bool
}{
{packfileBlobs[1:2], 1, false},
{packfileBlobs[2:5], 1, false},
{packfileBlobs[2:8], 1, false},
{[]restic.Blob{
packfileBlobs[0],
packfileBlobs[4],
packfileBlobs[2],
}, 1, false},
{[]restic.Blob{
packfileBlobs[0],
packfileBlobs[len(packfileBlobs)-1],
}, 2, false},
{packfileBlobs[:], 1, true},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
gotBlobs := make(map[restic.ID]int)
handleBlob := func(blob restic.BlobHandle, buf []byte, err error) error {
gotBlobs[blob.ID]++
id := restic.Hash(buf)
if !id.Equal(blob.ID) {
t.Fatalf("wrong id %v for blob %s returned", id, blob.ID)
}
return err
}
wantBlobs := make(map[restic.ID]int)
for _, blob := range test.blobs {
wantBlobs[blob.ID] = 1
}
loadCalls = 0
shortFirstLoad = test.shortFirstLoad
err := streamPack(ctx, load, nil, dec, &key, restic.ID{}, test.blobs, handleBlob)
if err != nil {
t.Fatal(err)
}
if !cmp.Equal(wantBlobs, gotBlobs) {
t.Fatal(cmp.Diff(wantBlobs, gotBlobs))
}
rtest.Equals(t, test.calls, loadCalls)
})
}
})
shortFirstLoad = false
// next, test invalid uses, which should return an error
t.Run("invalid", func(t *testing.T) {
tests := []struct {
blobs []restic.Blob
err string
}{
{
// pass one blob several times
blobs: []restic.Blob{
packfileBlobs[3],
packfileBlobs[8],
packfileBlobs[3],
packfileBlobs[4],
},
err: "overlapping blobs in pack",
},
{
// pass something that's not a valid blob in the current pack file
blobs: []restic.Blob{
{
Offset: 123,
Length: 20000,
},
},
err: "ciphertext verification failed",
},
{
// pass a blob that's too small
blobs: []restic.Blob{
{
Offset: 123,
Length: 10,
},
},
err: "invalid blob length",
},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
handleBlob := func(blob restic.BlobHandle, buf []byte, err error) error {
return err
}
err := streamPack(ctx, load, nil, dec, &key, restic.ID{}, test.blobs, handleBlob)
if err == nil {
t.Fatalf("wanted error %v, got nil", test.err)
}
if !strings.Contains(err.Error(), test.err) {
t.Fatalf("wrong error returned, it should contain %q but was %q", test.err, err)
}
})
}
})
}
func TestBlobVerification(t *testing.T) {
repo := TestRepository(t)
type DamageType string
const (
damageData DamageType = "data"
damageCompressed DamageType = "compressed"
damageCiphertext DamageType = "ciphertext"
)
for _, test := range []struct {
damage DamageType
msg string
}{
{"", ""},
{damageData, "hash mismatch"},
{damageCompressed, "decompression failed"},
{damageCiphertext, "ciphertext verification failed"},
} {
plaintext := rtest.Random(800, 1234)
id := restic.Hash(plaintext)
if test.damage == damageData {
plaintext[42] ^= 0x42
}
uncompressedLength := uint(len(plaintext))
plaintext = repo.getZstdEncoder().EncodeAll(plaintext, nil)
if test.damage == damageCompressed {
plaintext = plaintext[:len(plaintext)-8]
}
nonce := crypto.NewRandomNonce()
ciphertext := append([]byte{}, nonce...)
ciphertext = repo.Key().Seal(ciphertext, nonce, plaintext, nil)
if test.damage == damageCiphertext {
ciphertext[42] ^= 0x42
}
err := repo.verifyCiphertext(ciphertext, int(uncompressedLength), id)
if test.msg == "" {
rtest.Assert(t, err == nil, "expected no error, got %v", err)
} else {
rtest.Assert(t, strings.Contains(err.Error(), test.msg), "expected error to contain %q, got %q", test.msg, err)
}
}
}
func TestUnpackedVerification(t *testing.T) {
repo := TestRepository(t)
type DamageType string
const (
damageData DamageType = "data"
damageCompressed DamageType = "compressed"
damageCiphertext DamageType = "ciphertext"
)
for _, test := range []struct {
damage DamageType
msg string
}{
{"", ""},
{damageData, "data mismatch"},
{damageCompressed, "decompression failed"},
{damageCiphertext, "ciphertext verification failed"},
} {
plaintext := rtest.Random(800, 1234)
orig := append([]byte{}, plaintext...)
if test.damage == damageData {
plaintext[42] ^= 0x42
}
compressed := []byte{2}
compressed = repo.getZstdEncoder().EncodeAll(plaintext, compressed)
if test.damage == damageCompressed {
compressed = compressed[:len(compressed)-8]
}
nonce := crypto.NewRandomNonce()
ciphertext := append([]byte{}, nonce...)
ciphertext = repo.Key().Seal(ciphertext, nonce, compressed, nil)
if test.damage == damageCiphertext {
ciphertext[42] ^= 0x42
}
err := repo.verifyUnpacked(ciphertext, restic.IndexFile, orig)
if test.msg == "" {
rtest.Assert(t, err == nil, "expected no error, got %v", err)
} else {
rtest.Assert(t, strings.Contains(err.Error(), test.msg), "expected error to contain %q, got %q", test.msg, err)
}
}
}
func testKey(t *testing.T) crypto.Key {
const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}`
var key crypto.Key
err := json.Unmarshal([]byte(jsonKey), &key)
if err != nil {
t.Fatal(err)
}
return key
}
func TestStreamPackFallback(t *testing.T) {
dec, err := zstd.NewReader(nil)
if err != nil {
panic(dec)
}
defer dec.Close()
test := func(t *testing.T, failLoad bool) {
key := testKey(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
plaintext := rtest.Random(800, 42)
blobID := restic.Hash(plaintext)
blobs := []restic.Blob{
{
Length: uint(crypto.CiphertextLength(len(plaintext))),
Offset: 0,
BlobHandle: restic.BlobHandle{
ID: blobID,
Type: restic.DataBlob,
},
},
}
var loadPack backendLoadFn
if failLoad {
loadPack = func(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
return errors.New("load error")
}
} else {
loadPack = func(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
// just return an empty array to provoke an error
data := make([]byte, length)
return fn(bytes.NewReader(data))
}
}
loadBlob := func(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
if id == blobID {
return plaintext, nil
}
return nil, errors.New("unknown blob")
}
blobOK := false
handleBlob := func(blob restic.BlobHandle, buf []byte, err error) error {
rtest.OK(t, err)
rtest.Equals(t, blobID, blob.ID)
rtest.Equals(t, plaintext, buf)
blobOK = true
return err
}
err := streamPack(ctx, loadPack, loadBlob, dec, &key, restic.ID{}, blobs, handleBlob)
rtest.OK(t, err)
rtest.Assert(t, blobOK, "blob failed to load")
}
t.Run("corrupted blob", func(t *testing.T) {
test(t, false)
})
// test fallback for failed pack loading
t.Run("failed load", func(t *testing.T) {
test(t, true)
})
}