From 282c4379dbff36fa54de230cb7ee3007dbb50dd7 Mon Sep 17 00:00:00 2001 From: Florian Weingarten Date: Mon, 4 May 2015 15:07:21 -0400 Subject: [PATCH 1/7] Remove chunker.Reset() and chunker pool --- archiver.go | 5 ++--- archiver_test.go | 12 +++--------- chunker/chunker.go | 43 ++++++++++++++++------------------------- chunker/chunker_test.go | 30 +++++++--------------------- pools.go | 35 +++------------------------------ 5 files changed, 32 insertions(+), 93 deletions(-) diff --git a/archiver.go b/archiver.go index f77efb4ad..cc5d079b1 100644 --- a/archiver.go +++ b/archiver.go @@ -1,6 +1,7 @@ package restic import ( + "crypto/sha256" "encoding/json" "fmt" "io" @@ -183,10 +184,8 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error { return err } - chnker := GetChunker("archiver.SaveFile") - chnker.Reset(file, arch.s.Config.ChunkerPolynomial) + chnker := chunker.New(file, arch.s.Config.ChunkerPolynomial, sha256.New()) resultChannels := [](<-chan saveResult){} - defer FreeChunker("archiver.SaveFile", chnker) for { chunk, err := chnker.Next() diff --git a/archiver_test.go b/archiver_test.go index df9b1fcfc..519f34839 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -2,6 +2,7 @@ package restic_test import ( "bytes" + "crypto/sha256" "flag" "io" "testing" @@ -25,9 +26,8 @@ type Rdr interface { } func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { - ch := restic.GetChunker("BenchmarkChunkEncrypt") rd.Seek(0, 0) - ch.Reset(rd, testPol) + ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() @@ -47,8 +47,6 @@ func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.K _, err = crypto.Encrypt(key, buf2, buf) OK(b, err) } - - restic.FreeChunker("BenchmarkChunkEncrypt", ch) } func BenchmarkChunkEncrypt(b *testing.B) { @@ -73,9 +71,7 @@ func BenchmarkChunkEncrypt(b *testing.B) { } func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { - ch := restic.GetChunker("BenchmarkChunkEncryptP") - rd.Seek(0, 0) - ch.Reset(rd, testPol) + ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() @@ -88,8 +84,6 @@ func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) io.ReadFull(chunk.Reader(rd), buf) crypto.Encrypt(key, buf, buf) } - - restic.FreeChunker("BenchmarkChunkEncryptP", ch) } func BenchmarkChunkEncryptParallel(b *testing.B) { diff --git a/chunker/chunker.go b/chunker/chunker.go index 9e016e4dd..944321f75 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -17,6 +17,9 @@ const ( // aim to create chunks of 20 bits or about 1MiB on average. averageBits = 20 + // default buffer size + bufSize = 512 * KiB + // MinSize is the minimal size of a chunk. MinSize = 512 * KiB // MaxSize is the maximal size of a chunk. @@ -81,36 +84,33 @@ type Chunker struct { // New returns a new Chunker based on polynomial p that reads from data from rd // with bufsize and pass all data to hash along the way. -func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) *Chunker { +func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { c := &Chunker{ - buf: make([]byte, bufsize), - h: hash, + buf: make([]byte, bufSize), + h: h, + pol: pol, + rd: rd, } - c.Reset(rd, p) + + c.reset() + return c } -// Reset restarts a chunker so that it can be reused with a different -// polynomial and reader. -func (c *Chunker) Reset(rd io.Reader, p Pol) { - c.pol = p - c.polShift = uint(p.Deg() - 8) +func (c *Chunker) reset() { + c.polShift = uint(c.pol.Deg() - 8) c.fillTables() - c.rd = rd for i := 0; i < windowSize; i++ { c.window[i] = 0 } + c.closed = false c.digest = 0 c.wpos = 0 - c.pos = 0 - c.start = 0 c.count = 0 - - if p != 0 { - c.slide(1) - } + c.slide(1) + c.start = c.pos if c.h != nil { c.h.Reset() @@ -276,16 +276,7 @@ func (c *Chunker) Next() (*Chunk, error) { Digest: c.hashDigest(), } - if c.h != nil { - c.h.Reset() - } - - // reset chunker, but keep position - pos := c.pos - c.Reset(c.rd, c.pol) - c.pos = pos - c.start = pos - c.pre = MinSize - windowSize + c.reset() return chunk, nil } diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go index 9df5b49ae..fbd692d7d 100644 --- a/chunker/chunker_test.go +++ b/chunker/chunker_test.go @@ -19,7 +19,6 @@ import ( ) var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark") -var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark") func parseDigest(s string) []byte { d, err := hex.DecodeString(s) @@ -151,7 +150,7 @@ func getRandom(seed, count int) []byte { func TestChunker(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, *testBufSize, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) chunks := testWithData(t, ch, chunks1) // test reader @@ -178,7 +177,7 @@ func TestChunker(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, *testBufSize, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) } @@ -194,7 +193,7 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { t.Logf("generating random polynomial took %v", time.Since(start)) start = time.Now() - ch := chunker.New(bytes.NewReader(buf), p, *testBufSize, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) t.Logf("creating chunker took %v", time.Since(start)) // make sure that first chunk is different @@ -211,7 +210,7 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { func TestChunkerWithoutHash(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, *testBufSize, nil) + ch := chunker.New(bytes.NewReader(buf), testPol, nil) chunks := testWithData(t, ch, chunks1) // test reader @@ -241,30 +240,17 @@ func TestChunkerWithoutHash(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, *testBufSize, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) } -func TestChunkerReuse(t *testing.T) { - // test multiple uses of the same chunker - ch := chunker.New(nil, testPol, *testBufSize, sha256.New()) - buf := getRandom(23, 32*1024*1024) - - for i := 0; i < 4; i++ { - ch.Reset(bytes.NewReader(buf), testPol) - testWithData(t, ch, chunks1) - } -} - func benchmarkChunker(b *testing.B, hash hash.Hash) { var ( rd io.ReadSeeker size int ) - b.Logf("using bufsize %v", *testBufSize) - if *benchmarkFile != "" { b.Logf("using file %q for benchmark", *benchmarkFile) f, err := os.Open(*benchmarkFile) @@ -284,8 +270,6 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { rd = bytes.NewReader(getRandom(23, size)) } - ch := chunker.New(rd, testPol, *testBufSize, hash) - b.ResetTimer() b.SetBytes(int64(size)) @@ -294,7 +278,7 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { chunks = 0 rd.Seek(0, 0) - ch.Reset(rd, testPol) + ch := chunker.New(rd, testPol, hash) for { _, err := ch.Next() @@ -333,6 +317,6 @@ func BenchmarkNewChunker(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - chunker.New(bytes.NewBuffer(nil), p, *testBufSize, nil) + chunker.New(bytes.NewBuffer(nil), p, nil) } } diff --git a/pools.go b/pools.go index 02a56d9eb..151752062 100644 --- a/pools.go +++ b/pools.go @@ -1,7 +1,6 @@ package restic import ( - "crypto/sha256" "sync" "github.com/restic/restic/chunker" @@ -23,7 +22,6 @@ type poolStats struct { const ( maxCiphertextSize = crypto.Extension + chunker.MaxSize - chunkerBufSize = 512 * chunker.KiB ) func (s *poolStats) Get(k string) { @@ -72,12 +70,10 @@ func newPoolStats() *poolStats { } var ( - chunkPool = sync.Pool{New: newChunkBuf} - chunkerPool = sync.Pool{New: newChunker} + chunkPool = sync.Pool{New: newChunkBuf} - chunkStats = newPoolStats() - nodeStats = newPoolStats() - chunkerStats = newPoolStats() + chunkStats = newPoolStats() + nodeStats = newPoolStats() ) func newChunkBuf() interface{} { @@ -89,15 +85,6 @@ func newChunkBuf() interface{} { return make([]byte, maxCiphertextSize) } -func newChunker() interface{} { - chunkStats.m.Lock() - defer chunkStats.m.Unlock() - chunkStats.new++ - - // create a new chunker with a nil reader and null polynomial - return chunker.New(nil, 0, chunkerBufSize, sha256.New()) -} - func GetChunkBuf(s string) []byte { chunkStats.Get(s) return chunkPool.Get().([]byte) @@ -108,16 +95,6 @@ func FreeChunkBuf(s string, buf []byte) { chunkPool.Put(buf) } -func GetChunker(s string) *chunker.Chunker { - chunkerStats.Get(s) - return chunkerPool.Get().(*chunker.Chunker) -} - -func FreeChunker(s string, ch *chunker.Chunker) { - chunkerStats.Put(s) - chunkerPool.Put(ch) -} - func PoolAlloc() { debug.Log("pools.PoolAlloc", "pool stats for chunk: new %d, get %d, put %d, diff %d, max %d\n", chunkStats.new, chunkStats.get, chunkStats.put, chunkStats.get-chunkStats.put, chunkStats.max) @@ -131,10 +108,4 @@ func PoolAlloc() { for k, v := range nodeStats.mget { debug.Log("pools.PoolAlloc", "pool stats for node[%s]: get %d, put %d, diff %d, max %d\n", k, v, nodeStats.mput[k], v-nodeStats.mput[k], nodeStats.mmax[k]) } - - debug.Log("pools.PoolAlloc", "pool stats for chunker: new %d, get %d, put %d, diff %d, max %d\n", - chunkerStats.new, chunkerStats.get, chunkerStats.put, chunkerStats.get-chunkerStats.put, chunkerStats.max) - for k, v := range chunkerStats.mget { - debug.Log("pools.PoolAlloc", "pool stats for chunker[%s]: get %d, put %d, diff %d, max %d\n", k, v, chunkerStats.mput[k], v-chunkerStats.mput[k], chunkerStats.mmax[k]) - } } From 64a290c8dba3c7b5b945f228757c4a2f01c941b1 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 4 May 2015 23:25:07 +0200 Subject: [PATCH 2/7] chunker: Add buffer to New() --- archiver.go | 10 +++++++++- archiver_test.go | 16 +++++++++------- chunker/chunker.go | 15 +++++++++++---- chunker/chunker_test.go | 24 +++++++++++++++++------- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/archiver.go b/archiver.go index cc5d079b1..e994502ef 100644 --- a/archiver.go +++ b/archiver.go @@ -170,6 +170,12 @@ func updateNodeContent(node *Node, results []saveResult) error { return nil } +const chunkerBufSize = 512 * chunker.KiB + +var chunkerBufPool = sync.Pool{ + New: func() interface{} { return make([]byte, chunkerBufSize) }, +} + // SaveFile stores the content of the file on the backend as a Blob by calling // Save for each chunk. func (arch *Archiver) SaveFile(p *Progress, node *Node) error { @@ -184,7 +190,9 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error { return err } - chnker := chunker.New(file, arch.s.Config.ChunkerPolynomial, sha256.New()) + buf := chunkerBufPool.Get().([]byte) + chnker := chunker.New(file, arch.s.Config.ChunkerPolynomial, buf, sha256.New()) + defer chunkerBufPool.Put(buf) resultChannels := [](<-chan saveResult){} for { diff --git a/archiver_test.go b/archiver_test.go index 519f34839..d7ef839eb 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -18,16 +18,16 @@ import ( var benchArchiveDirectory = flag.String("test.benchdir", ".", "benchmark archiving a real directory (default: .)") var testPol = chunker.Pol(0x3DA3358B4DC173) -const bufSize = chunker.MiB +const chunkerBufSize = 512 * chunker.KiB type Rdr interface { io.ReadSeeker io.ReaderAt } -func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { +func benchmarkChunkEncrypt(b testing.TB, buf, buf2, chunkBuf []byte, rd Rdr, key *crypto.Key) { rd.Seek(0, 0) - ch := chunker.New(rd, testPol, sha256.New()) + ch := chunker.New(rd, testPol, chunkBuf, sha256.New()) for { chunk, err := ch.Next() @@ -58,20 +58,21 @@ func BenchmarkChunkEncrypt(b *testing.B) { buf := restic.GetChunkBuf("BenchmarkChunkEncrypt") buf2 := restic.GetChunkBuf("BenchmarkChunkEncrypt") + chunkBuf := make([]byte, chunkerBufSize) b.ResetTimer() b.SetBytes(int64(len(data))) for i := 0; i < b.N; i++ { - benchmarkChunkEncrypt(b, buf, buf2, rd, s.Key()) + benchmarkChunkEncrypt(b, buf, buf2, chunkBuf, rd, s.Key()) } restic.FreeChunkBuf("BenchmarkChunkEncrypt", buf) restic.FreeChunkBuf("BenchmarkChunkEncrypt", buf2) } -func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { - ch := chunker.New(rd, testPol, sha256.New()) +func benchmarkChunkEncryptP(b *testing.PB, buf, chunkBuf []byte, rd Rdr, key *crypto.Key) { + ch := chunker.New(rd, testPol, chunkBuf, sha256.New()) for { chunk, err := ch.Next() @@ -93,6 +94,7 @@ func BenchmarkChunkEncryptParallel(b *testing.B) { data := Random(23, 10<<20) // 10MiB buf := restic.GetChunkBuf("BenchmarkChunkEncryptParallel") + chunkBuf := make([]byte, chunkerBufSize) b.ResetTimer() b.SetBytes(int64(len(data))) @@ -100,7 +102,7 @@ func BenchmarkChunkEncryptParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { rd := bytes.NewReader(data) - benchmarkChunkEncryptP(pb, buf, rd, s.Key()) + benchmarkChunkEncryptP(pb, buf, chunkBuf, rd, s.Key()) } }) diff --git a/chunker/chunker.go b/chunker/chunker.go index 944321f75..33f5b1504 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -82,11 +82,18 @@ type Chunker struct { h hash.Hash } -// New returns a new Chunker based on polynomial p that reads from data from rd -// with bufsize and pass all data to hash along the way. -func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { +const minBufSize = 32 + +// New returns a new Chunker based on polynomial p that reads from rd +// with bufsize and pass all data to hash along the way, using buf for +// buffering. Buf must at least hold 32 bytes. +func New(rd io.Reader, pol Pol, buf []byte, h hash.Hash) *Chunker { + if len(buf) < minBufSize { + buf = make([]byte, minBufSize) + } + c := &Chunker{ - buf: make([]byte, bufSize), + buf: buf, h: h, pol: pol, rd: rd, diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go index fbd692d7d..7799a6db1 100644 --- a/chunker/chunker_test.go +++ b/chunker/chunker_test.go @@ -147,10 +147,13 @@ func getRandom(seed, count int) []byte { return buf } +const chunkerBufSize = 512 * chunker.KiB + func TestChunker(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) + chunkBuf := make([]byte, chunkerBufSize) + ch := chunker.New(bytes.NewReader(buf), testPol, chunkBuf, sha256.New()) chunks := testWithData(t, ch, chunks1) // test reader @@ -177,7 +180,7 @@ func TestChunker(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, chunkBuf, sha256.New()) testWithData(t, ch, chunks2) } @@ -185,6 +188,7 @@ func TestChunker(t *testing.T) { func TestChunkerWithRandomPolynomial(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) + chunkBuf := make([]byte, chunkerBufSize) // generate a new random polynomial start := time.Now() @@ -193,7 +197,7 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { t.Logf("generating random polynomial took %v", time.Since(start)) start = time.Now() - ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), p, chunkBuf, sha256.New()) t.Logf("creating chunker took %v", time.Since(start)) // make sure that first chunk is different @@ -210,7 +214,9 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { func TestChunkerWithoutHash(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, nil) + chunkBuf := make([]byte, chunkerBufSize) + + ch := chunker.New(bytes.NewReader(buf), testPol, chunkBuf, nil) chunks := testWithData(t, ch, chunks1) // test reader @@ -240,7 +246,7 @@ func TestChunkerWithoutHash(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, chunkBuf, sha256.New()) testWithData(t, ch, chunks2) } @@ -270,6 +276,8 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { rd = bytes.NewReader(getRandom(23, size)) } + chunkBuf := make([]byte, chunkerBufSize) + b.ResetTimer() b.SetBytes(int64(size)) @@ -278,7 +286,7 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { chunks = 0 rd.Seek(0, 0) - ch := chunker.New(rd, testPol, hash) + ch := chunker.New(rd, testPol, chunkBuf, hash) for { _, err := ch.Next() @@ -314,9 +322,11 @@ func BenchmarkNewChunker(b *testing.B) { p, err := chunker.RandomPolynomial() OK(b, err) + chunkBuf := make([]byte, chunkerBufSize) + b.ResetTimer() for i := 0; i < b.N; i++ { - chunker.New(bytes.NewBuffer(nil), p, nil) + chunker.New(bytes.NewBuffer(nil), p, chunkBuf, nil) } } From 589dbaaed25027804c3169e5019a63e903a2314c Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 5 May 2015 00:06:44 +0200 Subject: [PATCH 3/7] chunker: Remove unused constant --- chunker/chunker.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/chunker/chunker.go b/chunker/chunker.go index 33f5b1504..11d85b811 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -17,9 +17,6 @@ const ( // aim to create chunks of 20 bits or about 1MiB on average. averageBits = 20 - // default buffer size - bufSize = 512 * KiB - // MinSize is the minimal size of a chunk. MinSize = 512 * KiB // MaxSize is the maximal size of a chunk. From 1f33e29ce23adca93bc7a927de3e4ad6df1ced83 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 5 May 2015 00:07:57 +0200 Subject: [PATCH 4/7] crypto: Fix buffer extension in Decrypt() --- crypto/crypto.go | 10 +++++----- crypto/crypto_test.go | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crypto/crypto.go b/crypto/crypto.go index 3e7aedf07..4a1a66bed 100644 --- a/crypto/crypto.go +++ b/crypto/crypto.go @@ -276,11 +276,6 @@ func Decrypt(ks *Key, plaintext []byte, ciphertextWithMac []byte) ([]byte, error panic("trying to decrypt invalid data: ciphertext too small") } - if cap(plaintext) < len(ciphertextWithMac) { - // extend plaintext - plaintext = append(plaintext, make([]byte, len(ciphertextWithMac)-cap(plaintext))...) - } - // extract mac l := len(ciphertextWithMac) - macSize ciphertextWithIV, mac := ciphertextWithMac[:l], ciphertextWithMac[l:] @@ -293,6 +288,11 @@ func Decrypt(ks *Key, plaintext []byte, ciphertextWithMac []byte) ([]byte, error // extract iv iv, ciphertext := ciphertextWithIV[:ivSize], ciphertextWithIV[ivSize:] + if cap(plaintext) < len(ciphertext) { + // extend plaintext + plaintext = append(plaintext, make([]byte, len(ciphertext)-cap(plaintext))...) + } + // decrypt data c, err := aes.NewCipher(ks.Encrypt[:]) if err != nil { diff --git a/crypto/crypto_test.go b/crypto/crypto_test.go index 321461a46..8468b6c0f 100644 --- a/crypto/crypto_test.go +++ b/crypto/crypto_test.go @@ -107,10 +107,10 @@ func TestCornerCases(t *testing.T) { "wrong length returned for ciphertext, expected 0, got %d", len(c)) - // this should decrypt to an empty slice + // this should decrypt to nil p, err := crypto.Decrypt(k, nil, c) OK(t, err) - Equals(t, []byte{}, p) + Equals(t, []byte(nil), p) // test encryption for same slice, this should return an error _, err = crypto.Encrypt(k, c, c) From bdbb3ab329221c5fabf2f073ff4be6025302687b Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 5 May 2015 00:14:07 +0200 Subject: [PATCH 5/7] Remove pools.go --- archiver_test.go | 11 ++--- cmd/restic/main.go | 4 -- crypto/crypto_test.go | 15 ++---- pools.go | 111 ------------------------------------------ 4 files changed, 8 insertions(+), 133 deletions(-) delete mode 100644 pools.go diff --git a/archiver_test.go b/archiver_test.go index d7ef839eb..8babeed14 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -56,8 +56,8 @@ func BenchmarkChunkEncrypt(b *testing.B) { s := SetupBackend(b) defer TeardownBackend(b, s) - buf := restic.GetChunkBuf("BenchmarkChunkEncrypt") - buf2 := restic.GetChunkBuf("BenchmarkChunkEncrypt") + buf := make([]byte, chunker.MaxSize) + buf2 := make([]byte, chunker.MaxSize) chunkBuf := make([]byte, chunkerBufSize) b.ResetTimer() @@ -66,9 +66,6 @@ func BenchmarkChunkEncrypt(b *testing.B) { for i := 0; i < b.N; i++ { benchmarkChunkEncrypt(b, buf, buf2, chunkBuf, rd, s.Key()) } - - restic.FreeChunkBuf("BenchmarkChunkEncrypt", buf) - restic.FreeChunkBuf("BenchmarkChunkEncrypt", buf2) } func benchmarkChunkEncryptP(b *testing.PB, buf, chunkBuf []byte, rd Rdr, key *crypto.Key) { @@ -93,7 +90,7 @@ func BenchmarkChunkEncryptParallel(b *testing.B) { data := Random(23, 10<<20) // 10MiB - buf := restic.GetChunkBuf("BenchmarkChunkEncryptParallel") + buf := make([]byte, chunker.MaxSize) chunkBuf := make([]byte, chunkerBufSize) b.ResetTimer() @@ -105,8 +102,6 @@ func BenchmarkChunkEncryptParallel(b *testing.B) { benchmarkChunkEncryptP(pb, buf, chunkBuf, rd, s.Key()) } }) - - restic.FreeChunkBuf("BenchmarkChunkEncryptParallel", buf) } func archiveDirectory(b testing.TB) { diff --git a/cmd/restic/main.go b/cmd/restic/main.go index d5d7cd169..8b552ae26 100644 --- a/cmd/restic/main.go +++ b/cmd/restic/main.go @@ -10,7 +10,6 @@ import ( "golang.org/x/crypto/ssh/terminal" "github.com/jessevdk/go-flags" - "github.com/restic/restic" "github.com/restic/restic/backend" "github.com/restic/restic/backend/local" "github.com/restic/restic/backend/sftp" @@ -182,7 +181,4 @@ func main() { if err != nil { os.Exit(1) } - - // this prints some statistics for memory management using the debug package - restic.PoolAlloc() } diff --git a/crypto/crypto_test.go b/crypto/crypto_test.go index 8468b6c0f..faba9970c 100644 --- a/crypto/crypto_test.go +++ b/crypto/crypto_test.go @@ -8,7 +8,6 @@ import ( "os" "testing" - "github.com/restic/restic" "github.com/restic/restic/chunker" "github.com/restic/restic/crypto" . "github.com/restic/restic/test" @@ -29,7 +28,9 @@ func TestEncryptDecrypt(t *testing.T) { _, err := io.ReadFull(RandomReader(42, size), data) OK(t, err) - ciphertext, err := crypto.Encrypt(k, restic.GetChunkBuf("TestEncryptDecrypt"), data) + buf := make([]byte, size+crypto.Extension) + + ciphertext, err := crypto.Encrypt(k, buf, data) OK(t, err) Assert(t, len(ciphertext) == len(data)+crypto.Extension, "ciphertext length does not match: want %d, got %d", @@ -41,8 +42,6 @@ func TestEncryptDecrypt(t *testing.T) { "plaintext length does not match: want %d, got %d", len(data), len(plaintext)) - restic.FreeChunkBuf("TestEncryptDecrypt", ciphertext) - Equals(t, plaintext, data) } } @@ -226,8 +225,6 @@ func BenchmarkEncryptDecryptReader(b *testing.B) { _, err = io.Copy(ioutil.Discard, r) OK(b, err) } - - restic.PoolAlloc() } func BenchmarkDecrypt(b *testing.B) { @@ -236,10 +233,8 @@ func BenchmarkDecrypt(b *testing.B) { k := crypto.NewRandomKey() - ciphertext := restic.GetChunkBuf("BenchmarkDecrypt") - defer restic.FreeChunkBuf("BenchmarkDecrypt", ciphertext) - plaintext := restic.GetChunkBuf("BenchmarkDecrypt") - defer restic.FreeChunkBuf("BenchmarkDecrypt", plaintext) + plaintext := make([]byte, size) + ciphertext := make([]byte, size+crypto.Extension) ciphertext, err := crypto.Encrypt(k, ciphertext, data) OK(b, err) diff --git a/pools.go b/pools.go deleted file mode 100644 index 151752062..000000000 --- a/pools.go +++ /dev/null @@ -1,111 +0,0 @@ -package restic - -import ( - "sync" - - "github.com/restic/restic/chunker" - "github.com/restic/restic/crypto" - "github.com/restic/restic/debug" -) - -type poolStats struct { - m sync.Mutex - mget map[string]int - mput map[string]int - mmax map[string]int - - new int - get int - put int - max int -} - -const ( - maxCiphertextSize = crypto.Extension + chunker.MaxSize -) - -func (s *poolStats) Get(k string) { - s.m.Lock() - defer s.m.Unlock() - - s.get++ - cur := s.get - s.put - if cur > s.max { - s.max = cur - } - - if k != "" { - if _, ok := s.mget[k]; !ok { - s.mget[k] = 0 - s.mput[k] = 0 - s.mmax[k] = 0 - } - - s.mget[k]++ - - cur = s.mget[k] - s.mput[k] - if cur > s.mmax[k] { - s.mmax[k] = cur - } - } -} - -func (s *poolStats) Put(k string) { - s.m.Lock() - defer s.m.Unlock() - - s.put++ - - if k != "" { - s.mput[k]++ - } -} - -func newPoolStats() *poolStats { - return &poolStats{ - mget: make(map[string]int), - mput: make(map[string]int), - mmax: make(map[string]int), - } -} - -var ( - chunkPool = sync.Pool{New: newChunkBuf} - - chunkStats = newPoolStats() - nodeStats = newPoolStats() -) - -func newChunkBuf() interface{} { - chunkStats.m.Lock() - defer chunkStats.m.Unlock() - chunkStats.new++ - - // create buffer for iv, data and mac - return make([]byte, maxCiphertextSize) -} - -func GetChunkBuf(s string) []byte { - chunkStats.Get(s) - return chunkPool.Get().([]byte) -} - -func FreeChunkBuf(s string, buf []byte) { - chunkStats.Put(s) - chunkPool.Put(buf) -} - -func PoolAlloc() { - debug.Log("pools.PoolAlloc", "pool stats for chunk: new %d, get %d, put %d, diff %d, max %d\n", - chunkStats.new, chunkStats.get, chunkStats.put, chunkStats.get-chunkStats.put, chunkStats.max) - for k, v := range chunkStats.mget { - debug.Log("pools.PoolAlloc", "pool stats for chunk[%s]: get %d, put %d, diff %d, max %d\n", - k, v, chunkStats.mput[k], v-chunkStats.mput[k], chunkStats.mmax[k]) - } - - debug.Log("pools.PoolAlloc", "pool stats for node: new %d, get %d, put %d, diff %d, max %d\n", - nodeStats.new, nodeStats.get, nodeStats.put, nodeStats.get-nodeStats.put, nodeStats.max) - for k, v := range nodeStats.mget { - debug.Log("pools.PoolAlloc", "pool stats for node[%s]: get %d, put %d, diff %d, max %d\n", k, v, nodeStats.mput[k], v-nodeStats.mput[k], nodeStats.mmax[k]) - } -} From ea22b2dfb1e55c8cd22e4983d147cb83c07cee41 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 5 May 2015 00:45:29 +0200 Subject: [PATCH 6/7] chunker: move buffer pool to chunker package --- archiver.go | 10 +--------- archiver_test.go | 14 ++++++-------- chunker/chunker.go | 21 ++++++++++++--------- chunker/chunker_test.go | 23 +++++++---------------- 4 files changed, 26 insertions(+), 42 deletions(-) diff --git a/archiver.go b/archiver.go index e994502ef..cc5d079b1 100644 --- a/archiver.go +++ b/archiver.go @@ -170,12 +170,6 @@ func updateNodeContent(node *Node, results []saveResult) error { return nil } -const chunkerBufSize = 512 * chunker.KiB - -var chunkerBufPool = sync.Pool{ - New: func() interface{} { return make([]byte, chunkerBufSize) }, -} - // SaveFile stores the content of the file on the backend as a Blob by calling // Save for each chunk. func (arch *Archiver) SaveFile(p *Progress, node *Node) error { @@ -190,9 +184,7 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error { return err } - buf := chunkerBufPool.Get().([]byte) - chnker := chunker.New(file, arch.s.Config.ChunkerPolynomial, buf, sha256.New()) - defer chunkerBufPool.Put(buf) + chnker := chunker.New(file, arch.s.Config.ChunkerPolynomial, sha256.New()) resultChannels := [](<-chan saveResult){} for { diff --git a/archiver_test.go b/archiver_test.go index 8babeed14..331da66ce 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -25,9 +25,9 @@ type Rdr interface { io.ReaderAt } -func benchmarkChunkEncrypt(b testing.TB, buf, buf2, chunkBuf []byte, rd Rdr, key *crypto.Key) { +func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { rd.Seek(0, 0) - ch := chunker.New(rd, testPol, chunkBuf, sha256.New()) + ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() @@ -58,18 +58,17 @@ func BenchmarkChunkEncrypt(b *testing.B) { buf := make([]byte, chunker.MaxSize) buf2 := make([]byte, chunker.MaxSize) - chunkBuf := make([]byte, chunkerBufSize) b.ResetTimer() b.SetBytes(int64(len(data))) for i := 0; i < b.N; i++ { - benchmarkChunkEncrypt(b, buf, buf2, chunkBuf, rd, s.Key()) + benchmarkChunkEncrypt(b, buf, buf2, rd, s.Key()) } } -func benchmarkChunkEncryptP(b *testing.PB, buf, chunkBuf []byte, rd Rdr, key *crypto.Key) { - ch := chunker.New(rd, testPol, chunkBuf, sha256.New()) +func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { + ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() @@ -91,7 +90,6 @@ func BenchmarkChunkEncryptParallel(b *testing.B) { data := Random(23, 10<<20) // 10MiB buf := make([]byte, chunker.MaxSize) - chunkBuf := make([]byte, chunkerBufSize) b.ResetTimer() b.SetBytes(int64(len(data))) @@ -99,7 +97,7 @@ func BenchmarkChunkEncryptParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { rd := bytes.NewReader(data) - benchmarkChunkEncryptP(pb, buf, chunkBuf, rd, s.Key()) + benchmarkChunkEncryptP(pb, buf, rd, s.Key()) } }) } diff --git a/chunker/chunker.go b/chunker/chunker.go index 11d85b811..a7a5609b7 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -23,8 +23,14 @@ const ( MaxSize = 8 * MiB splitmask = (1 << averageBits) - 1 + + chunkerBufSize = 512 * KiB ) +var bufPool = sync.Pool{ + New: func() interface{} { return make([]byte, chunkerBufSize) }, +} + type tables struct { out [256]Pol mod [256]Pol @@ -79,18 +85,12 @@ type Chunker struct { h hash.Hash } -const minBufSize = 32 - // New returns a new Chunker based on polynomial p that reads from rd // with bufsize and pass all data to hash along the way, using buf for -// buffering. Buf must at least hold 32 bytes. -func New(rd io.Reader, pol Pol, buf []byte, h hash.Hash) *Chunker { - if len(buf) < minBufSize { - buf = make([]byte, minBufSize) - } - +// buffering. +func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { c := &Chunker{ - buf: buf, + buf: bufPool.Get().([]byte), h: h, pol: pol, rd: rd, @@ -204,6 +204,9 @@ func (c *Chunker) Next() (*Chunk, error) { if err == io.EOF && !c.closed { c.closed = true + // return the buffer to the pool + bufPool.Put(c.buf) + // return current chunk, if any bytes have been processed if c.count > 0 { return &Chunk{ diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go index 7799a6db1..5c38d2b0d 100644 --- a/chunker/chunker_test.go +++ b/chunker/chunker_test.go @@ -147,13 +147,10 @@ func getRandom(seed, count int) []byte { return buf } -const chunkerBufSize = 512 * chunker.KiB - func TestChunker(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - chunkBuf := make([]byte, chunkerBufSize) - ch := chunker.New(bytes.NewReader(buf), testPol, chunkBuf, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) chunks := testWithData(t, ch, chunks1) // test reader @@ -180,7 +177,7 @@ func TestChunker(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, chunkBuf, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) } @@ -188,7 +185,6 @@ func TestChunker(t *testing.T) { func TestChunkerWithRandomPolynomial(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - chunkBuf := make([]byte, chunkerBufSize) // generate a new random polynomial start := time.Now() @@ -197,7 +193,7 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { t.Logf("generating random polynomial took %v", time.Since(start)) start = time.Now() - ch := chunker.New(bytes.NewReader(buf), p, chunkBuf, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) t.Logf("creating chunker took %v", time.Since(start)) // make sure that first chunk is different @@ -214,9 +210,8 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { func TestChunkerWithoutHash(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - chunkBuf := make([]byte, chunkerBufSize) - ch := chunker.New(bytes.NewReader(buf), testPol, chunkBuf, nil) + ch := chunker.New(bytes.NewReader(buf), testPol, nil) chunks := testWithData(t, ch, chunks1) // test reader @@ -246,7 +241,7 @@ func TestChunkerWithoutHash(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, chunkBuf, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) } @@ -276,8 +271,6 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { rd = bytes.NewReader(getRandom(23, size)) } - chunkBuf := make([]byte, chunkerBufSize) - b.ResetTimer() b.SetBytes(int64(size)) @@ -286,7 +279,7 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { chunks = 0 rd.Seek(0, 0) - ch := chunker.New(rd, testPol, chunkBuf, hash) + ch := chunker.New(rd, testPol, hash) for { _, err := ch.Next() @@ -322,11 +315,9 @@ func BenchmarkNewChunker(b *testing.B) { p, err := chunker.RandomPolynomial() OK(b, err) - chunkBuf := make([]byte, chunkerBufSize) - b.ResetTimer() for i := 0; i < b.N; i++ { - chunker.New(bytes.NewBuffer(nil), p, chunkBuf, nil) + chunker.New(bytes.NewBuffer(nil), p, nil) } } From f79e530e18f37ef6a32ff8d5b838a70b09bf44ce Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 5 May 2015 00:56:07 +0200 Subject: [PATCH 7/7] chunker: Further cleanup --- archiver_test.go | 2 -- chunker/chunker.go | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/archiver_test.go b/archiver_test.go index 331da66ce..f42bdd71a 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -18,8 +18,6 @@ import ( var benchArchiveDirectory = flag.String("test.benchdir", ".", "benchmark archiving a real directory (default: .)") var testPol = chunker.Pol(0x3DA3358B4DC173) -const chunkerBufSize = 512 * chunker.KiB - type Rdr interface { io.ReadSeeker io.ReaderAt diff --git a/chunker/chunker.go b/chunker/chunker.go index a7a5609b7..d39bfc71c 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -86,8 +86,7 @@ type Chunker struct { } // New returns a new Chunker based on polynomial p that reads from rd -// with bufsize and pass all data to hash along the way, using buf for -// buffering. +// with bufsize and pass all data to hash along the way. func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { c := &Chunker{ buf: bufPool.Get().([]byte),