restore: separately restore blobs that are frequently referenced

Writing these blobs to their files can take a long time and consequently
cause the backend connection to time out. Avoid that by retrieving these
blobs separately.
This commit is contained in:
Michael Eischer 2024-01-07 12:17:35 +01:00
parent f0f89d7f27
commit 103beb96bc
2 changed files with 47 additions and 1 deletions

View File

@ -242,8 +242,33 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
// track already processed blobs for precise error reporting // track already processed blobs for precise error reporting
processedBlobs := restic.NewBlobSet() processedBlobs := restic.NewBlobSet()
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs) for _, entry := range blobs {
occurrences := 0
for _, offsets := range entry.files {
occurrences += len(offsets)
}
// With a maximum blob size of 8MB, the normal blob streaming has to write
// at most 800MB for a single blob. This should be short enough to avoid
// network connection timeouts. Based on a quick test, a limit of 100 only
// selects a very small number of blobs (the number of references per blob
// - aka. `count` - seem to follow a expontential distribution)
if occurrences > 100 {
// process frequently referenced blobs first as these can take a long time to write
// which can cause backend connections to time out
delete(blobs, entry.blob.ID)
partialBlobs := blobToFileOffsetsMapping{entry.blob.ID: entry}
err := r.downloadBlobs(ctx, pack.id, partialBlobs, processedBlobs)
if err := r.reportError(blobs, processedBlobs, err); err != nil {
return err
}
}
}
if len(blobs) == 0 {
return nil
}
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
return r.reportError(blobs, processedBlobs, err) return r.reportError(blobs, processedBlobs, err)
} }

View File

@ -247,6 +247,27 @@ func TestFileRestorerPackSkip(t *testing.T) {
} }
} }
func TestFileRestorerFrequentBlob(t *testing.T) {
tempdir := rtest.TempDir(t)
for _, sparse := range []bool{false, true} {
blobs := []TestBlob{
{"data1-1", "pack1-1"},
}
for i := 0; i < 10000; i++ {
blobs = append(blobs, TestBlob{"a", "pack1-1"})
}
blobs = append(blobs, TestBlob{"end", "pack1-1"})
restoreAndVerify(t, tempdir, []TestFile{
{
name: "file1",
blobs: blobs,
},
}, nil, sparse)
}
}
func TestErrorRestoreFiles(t *testing.T) { func TestErrorRestoreFiles(t *testing.T) {
tempdir := rtest.TempDir(t) tempdir := rtest.TempDir(t)
content := []TestFile{ content := []TestFile{