From b10acd2af7462e18f63ddd7c93132eb90c9b16ab Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Fri, 6 Mar 2020 09:17:33 +0100 Subject: [PATCH 1/3] Test and benchmark blob sorting in internal/repository --- internal/repository/repository.go | 14 ++-- .../repository/repository_internal_test.go | 81 +++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 internal/repository/repository_internal_test.go diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 0d5242022..cee36ace8 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -111,9 +111,13 @@ func (r *Repository) LoadAndDecrypt(ctx context.Context, buf []byte, t restic.Fi return plaintext, nil } -// sortCachedPacks moves all cached pack files to the front of blobs. -func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedBlob { - if r.Cache == nil { +type haver interface { + Has(restic.Handle) bool +} + +// sortCachedPacksFirst moves all cached pack files to the front of blobs. +func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) []restic.PackedBlob { + if cache == nil { return blobs } @@ -126,7 +130,7 @@ func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedB noncached := make([]restic.PackedBlob, 0, len(blobs)/2) for _, blob := range blobs { - if r.Cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) { + if cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) { cached = append(cached, blob) continue } @@ -149,7 +153,7 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. } // try cached pack files first - blobs = r.sortCachedPacks(blobs) + blobs = sortCachedPacksFirst(r.Cache, blobs) var lastError error for _, blob := range blobs { diff --git a/internal/repository/repository_internal_test.go b/internal/repository/repository_internal_test.go new file mode 100644 index 000000000..0aeea992a --- /dev/null +++ b/internal/repository/repository_internal_test.go @@ -0,0 +1,81 @@ +package repository + +import ( + "math/rand" + "testing" + + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +type mapcache map[restic.Handle]struct{} + +func (c mapcache) Has(h restic.Handle) bool { + _, ok := c[h] + return ok +} + +func TestSortCachedPacksFirst(t *testing.T) { + var ( + blobs [100]restic.PackedBlob + blobset = make(map[restic.PackedBlob]struct{}) + cache = make(mapcache) + r = rand.New(rand.NewSource(1261)) + ) + + for i := 0; i < len(blobs); i++ { + var id restic.ID + r.Read(id[:]) + blobs[i] = restic.PackedBlob{PackID: id} + blobset[blobs[i]] = struct{}{} + + if i%3 == 0 { + h := restic.Handle{Name: id.String(), Type: restic.DataFile} + cache[h] = struct{}{} + } + } + + sorted := sortCachedPacksFirst(cache, blobs[:]) + + rtest.Equals(t, len(blobs), len(sorted)) + for i := 0; i < len(blobs); i++ { + h := restic.Handle{Type: restic.DataFile, Name: sorted[i].PackID.String()} + if i < len(cache) { + rtest.Assert(t, cache.Has(h), "non-cached blob at front of sorted output") + } else { + rtest.Assert(t, !cache.Has(h), "cached blob at end of sorted output") + } + _, ok := blobset[sorted[i]] + rtest.Assert(t, ok, "sortCachedPacksFirst changed blob id") + } +} + +func BenchmarkSortCachedPacksFirst(b *testing.B) { + const nblobs = 512 // Corresponds to a file of ca. 2GB. + + var ( + blobs [nblobs]restic.PackedBlob + cache = make(mapcache) + r = rand.New(rand.NewSource(1261)) + ) + + for i := 0; i < nblobs; i++ { + var id restic.ID + r.Read(id[:]) + blobs[i] = restic.PackedBlob{PackID: id} + + if i%3 == 0 { + h := restic.Handle{Name: id.String(), Type: restic.DataFile} + cache[h] = struct{}{} + } + } + + var cpy [nblobs]restic.PackedBlob + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + copy(cpy[:], blobs[:]) + sortCachedPacksFirst(cache, cpy[:]) + } +} From 03d23e6faa2afd526771331083eda0f1d0bb0d3b Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Fri, 6 Mar 2020 09:18:38 +0100 Subject: [PATCH 2/3] Speed up blob sorting in internal/repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta SortCachedPacksFirst-8 208µs ± 3% 186µs ± 3% -10.74% (p=0.000 n=10+8) name old alloc/op new alloc/op delta SortCachedPacksFirst-8 213kB ± 0% 139kB ± 0% -34.62% (p=0.000 n=10+10) name old allocs/op new allocs/op delta SortCachedPacksFirst-8 1.03k ± 0% 1.03k ± 0% -0.19% (p=0.000 n=10+10) --- internal/repository/repository.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index cee36ace8..47cf4fb0a 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -116,6 +116,7 @@ type haver interface { } // sortCachedPacksFirst moves all cached pack files to the front of blobs. +// It overwrites blobs. func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) []restic.PackedBlob { if cache == nil { return blobs @@ -126,7 +127,7 @@ func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) []restic.Packe return blobs } - cached := make([]restic.PackedBlob, 0, len(blobs)/2) + cached := blobs[:0] noncached := make([]restic.PackedBlob, 0, len(blobs)/2) for _, blob := range blobs { From 309598c237079f7b6300d2b39e10738c52be51ce Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Tue, 10 Mar 2020 15:56:08 +0100 Subject: [PATCH 3/3] Simplify sortCachedPacksFirst test in internal/repository The test now uses the fact that the sort is stable. It's not guaranteed to be, but the test is cleaner and more exhaustive. sortCachedPacksFirst no longer needs a return value. --- internal/repository/repository.go | 11 +++-- .../repository/repository_internal_test.go | 41 ++++++++----------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 47cf4fb0a..f5d07a08b 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -116,15 +116,14 @@ type haver interface { } // sortCachedPacksFirst moves all cached pack files to the front of blobs. -// It overwrites blobs. -func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) []restic.PackedBlob { +func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) { if cache == nil { - return blobs + return } // no need to sort a list with one element if len(blobs) == 1 { - return blobs + return } cached := blobs[:0] @@ -138,7 +137,7 @@ func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) []restic.Packe noncached = append(noncached, blob) } - return append(cached, noncached...) + copy(blobs[len(cached):], noncached) } // LoadBlob loads a blob of type t from the repository. @@ -154,7 +153,7 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. } // try cached pack files first - blobs = sortCachedPacksFirst(r.Cache, blobs) + sortCachedPacksFirst(r.Cache, blobs) var lastError error for _, blob := range blobs { diff --git a/internal/repository/repository_internal_test.go b/internal/repository/repository_internal_test.go index 0aeea992a..7bd126b77 100644 --- a/internal/repository/repository_internal_test.go +++ b/internal/repository/repository_internal_test.go @@ -2,52 +2,45 @@ package repository import ( "math/rand" + "sort" "testing" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" ) -type mapcache map[restic.Handle]struct{} +type mapcache map[restic.Handle]bool -func (c mapcache) Has(h restic.Handle) bool { - _, ok := c[h] - return ok -} +func (c mapcache) Has(h restic.Handle) bool { return c[h] } func TestSortCachedPacksFirst(t *testing.T) { var ( - blobs [100]restic.PackedBlob - blobset = make(map[restic.PackedBlob]struct{}) - cache = make(mapcache) - r = rand.New(rand.NewSource(1261)) + blobs, sorted [100]restic.PackedBlob + + cache = make(mapcache) + r = rand.New(rand.NewSource(1261)) ) for i := 0; i < len(blobs); i++ { var id restic.ID r.Read(id[:]) blobs[i] = restic.PackedBlob{PackID: id} - blobset[blobs[i]] = struct{}{} if i%3 == 0 { h := restic.Handle{Name: id.String(), Type: restic.DataFile} - cache[h] = struct{}{} + cache[h] = true } } - sorted := sortCachedPacksFirst(cache, blobs[:]) + copy(sorted[:], blobs[:]) + sort.SliceStable(sorted[:], func(i, j int) bool { + hi := restic.Handle{Type: restic.DataFile, Name: sorted[i].PackID.String()} + hj := restic.Handle{Type: restic.DataFile, Name: sorted[j].PackID.String()} + return cache.Has(hi) && !cache.Has(hj) + }) - rtest.Equals(t, len(blobs), len(sorted)) - for i := 0; i < len(blobs); i++ { - h := restic.Handle{Type: restic.DataFile, Name: sorted[i].PackID.String()} - if i < len(cache) { - rtest.Assert(t, cache.Has(h), "non-cached blob at front of sorted output") - } else { - rtest.Assert(t, !cache.Has(h), "cached blob at end of sorted output") - } - _, ok := blobset[sorted[i]] - rtest.Assert(t, ok, "sortCachedPacksFirst changed blob id") - } + sortCachedPacksFirst(cache, blobs[:]) + rtest.Equals(t, sorted, blobs) } func BenchmarkSortCachedPacksFirst(b *testing.B) { @@ -66,7 +59,7 @@ func BenchmarkSortCachedPacksFirst(b *testing.B) { if i%3 == 0 { h := restic.Handle{Name: id.String(), Type: restic.DataFile} - cache[h] = struct{}{} + cache[h] = true } }