mirror of https://github.com/restic/restic.git
restore: separately restore blobs that are frequently referenced
Writing these blobs to their files can take a long time and consequently cause the backend connection to time out. Avoid that by retrieving these blobs separately.
This commit is contained in:
parent
f0f89d7f27
commit
103beb96bc
|
@ -242,8 +242,33 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
|
||||||
|
|
||||||
// track already processed blobs for precise error reporting
|
// track already processed blobs for precise error reporting
|
||||||
processedBlobs := restic.NewBlobSet()
|
processedBlobs := restic.NewBlobSet()
|
||||||
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
|
for _, entry := range blobs {
|
||||||
|
occurrences := 0
|
||||||
|
for _, offsets := range entry.files {
|
||||||
|
occurrences += len(offsets)
|
||||||
|
}
|
||||||
|
// With a maximum blob size of 8MB, the normal blob streaming has to write
|
||||||
|
// at most 800MB for a single blob. This should be short enough to avoid
|
||||||
|
// network connection timeouts. Based on a quick test, a limit of 100 only
|
||||||
|
// selects a very small number of blobs (the number of references per blob
|
||||||
|
// - aka. `count` - seem to follow a expontential distribution)
|
||||||
|
if occurrences > 100 {
|
||||||
|
// process frequently referenced blobs first as these can take a long time to write
|
||||||
|
// which can cause backend connections to time out
|
||||||
|
delete(blobs, entry.blob.ID)
|
||||||
|
partialBlobs := blobToFileOffsetsMapping{entry.blob.ID: entry}
|
||||||
|
err := r.downloadBlobs(ctx, pack.id, partialBlobs, processedBlobs)
|
||||||
|
if err := r.reportError(blobs, processedBlobs, err); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(blobs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
|
||||||
return r.reportError(blobs, processedBlobs, err)
|
return r.reportError(blobs, processedBlobs, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -247,6 +247,27 @@ func TestFileRestorerPackSkip(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFileRestorerFrequentBlob(t *testing.T) {
|
||||||
|
tempdir := rtest.TempDir(t)
|
||||||
|
|
||||||
|
for _, sparse := range []bool{false, true} {
|
||||||
|
blobs := []TestBlob{
|
||||||
|
{"data1-1", "pack1-1"},
|
||||||
|
}
|
||||||
|
for i := 0; i < 10000; i++ {
|
||||||
|
blobs = append(blobs, TestBlob{"a", "pack1-1"})
|
||||||
|
}
|
||||||
|
blobs = append(blobs, TestBlob{"end", "pack1-1"})
|
||||||
|
|
||||||
|
restoreAndVerify(t, tempdir, []TestFile{
|
||||||
|
{
|
||||||
|
name: "file1",
|
||||||
|
blobs: blobs,
|
||||||
|
},
|
||||||
|
}, nil, sparse)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestErrorRestoreFiles(t *testing.T) {
|
func TestErrorRestoreFiles(t *testing.T) {
|
||||||
tempdir := rtest.TempDir(t)
|
tempdir := rtest.TempDir(t)
|
||||||
content := []TestFile{
|
content := []TestFile{
|
||||||
|
|
Loading…
Reference in New Issue