restore: separately restore blobs that are frequently referenced

Writing these blobs to their files can take a long time and consequently cause the backend connection to time out. Avoid that by retrieving these blobs separately.
2024-01-07 12:17:35 +01:00 · 2024-01-07 12:17:35 +01:00 · 103beb96bc
parent f0f89d7f27
commit 103beb96bc
2 changed files with 47 additions and 1 deletions
--- a/internal/restorer/filerestorer.go
+++ b/internal/restorer/filerestorer.go
@ -242,8 +242,33 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
 	// track already processed blobs for precise error reporting
 	processedBlobs := restic.NewBlobSet()
-	err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
+	for _, entry := range blobs {
 		occurrences := 0
 		for _, offsets := range entry.files {
 			occurrences += len(offsets)
 		}
 		// With a maximum blob size of 8MB, the normal blob streaming has to write
 		// at most 800MB for a single blob. This should be short enough to avoid
 		// network connection timeouts. Based on a quick test, a limit of 100 only
 		// selects a very small number of blobs (the number of references per blob
 		// - aka. `count` - seem to follow a expontential distribution)
 		if occurrences > 100 {
 			// process frequently referenced blobs first as these can take a long time to write
 			// which can cause backend connections to time out
 			delete(blobs, entry.blob.ID)
 			partialBlobs := blobToFileOffsetsMapping{entry.blob.ID: entry}
 			err := r.downloadBlobs(ctx, pack.id, partialBlobs, processedBlobs)
 			if err := r.reportError(blobs, processedBlobs, err); err != nil {
 				return err
 			}
 		}
 	}
 	if len(blobs) == 0 {
 		return nil
 	}
 	err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
 	return r.reportError(blobs, processedBlobs, err)
 }
--- a/internal/restorer/filerestorer_test.go
+++ b/internal/restorer/filerestorer_test.go
@ -247,6 +247,27 @@ func TestFileRestorerPackSkip(t *testing.T) {
 	}
 }
 func TestFileRestorerFrequentBlob(t *testing.T) {
 	tempdir := rtest.TempDir(t)
 	for _, sparse := range []bool{false, true} {
 		blobs := []TestBlob{
 			{"data1-1", "pack1-1"},
 		}
 		for i := 0; i < 10000; i++ {
 			blobs = append(blobs, TestBlob{"a", "pack1-1"})
 		}
 		blobs = append(blobs, TestBlob{"end", "pack1-1"})
 		restoreAndVerify(t, tempdir, []TestFile{
 			{
 				name:  "file1",
 				blobs: blobs,
 			},
 		}, nil, sparse)
 	}
 }
 func TestErrorRestoreFiles(t *testing.T) {
 	tempdir := rtest.TempDir(t)
 	content := []TestFile{