Merge pull request #3810 from greatroar/revert-3786

Revert "restic prune: Merge three loops over the index"
This commit is contained in:
MichaelEischer 2022-07-01 23:13:39 +02:00 committed by GitHub
commit 19641bf828
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 59 additions and 42 deletions

View File

@ -242,26 +242,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
Verbosef("searching used packs...\n") Verbosef("searching used packs...\n")
indexPack := make(map[restic.ID]packInfo)
keepBlobs := restic.NewBlobSet() keepBlobs := restic.NewBlobSet()
duplicateBlobs := restic.NewBlobSet()
// iterate over all blobs in index to generate packInfo and find duplicates // iterate over all blobs in index to find out which blobs are duplicates
for blob := range repo.Index().Each(ctx) { for blob := range repo.Index().Each(ctx) {
ip, seen := indexPack[blob.PackID]
if seen {
// mark mixed packs with "Invalid blob type"
if ip.tpe != blob.Type {
ip.tpe = restic.InvalidBlob
}
} else {
ip = packInfo{
tpe: blob.Type,
usedSize: pack.HeaderSize,
}
}
ip.usedSize += uint64(pack.CalculateEntrySize(blob.Blob))
bh := blob.BlobHandle bh := blob.BlobHandle
size := uint64(blob.Length) size := uint64(blob.Length)
switch { switch {
@ -270,27 +255,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
keepBlobs.Insert(bh) keepBlobs.Insert(bh)
stats.size.used += size stats.size.used += size
stats.blobs.used++ stats.blobs.used++
ip.usedSize += size case keepBlobs.Has(bh): // duplicate blob
ip.usedBlobs++ duplicateBlobs.Insert(bh)
case keepBlobs.Has(bh): // duplicate of a blob that we want to keep
stats.size.duplicate += size stats.size.duplicate += size
stats.blobs.duplicate++ stats.blobs.duplicate++
ip.usedSize += size default:
ip.duplicateBlobs++
default: // unused, don't care if it's a duplicate
stats.size.unused += size stats.size.unused += size
stats.blobs.unused++ stats.blobs.unused++
ip.unusedSize += size
ip.unusedBlobs++
} }
if !blob.IsCompressed() {
ip.uncompressed = true
}
// update indexPack
indexPack[blob.PackID] = ip
} }
// Check if all used blobs have been found in index // Check if all used blobs have been found in index
@ -303,6 +275,48 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
return errorIndexIncomplete return errorIndexIncomplete
} }
indexPack := make(map[restic.ID]packInfo)
// save computed pack header size
for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) {
// initialize tpe with NumBlobTypes to indicate it's not set
indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
}
// iterate over all blobs in index to generate packInfo
for blob := range repo.Index().Each(ctx) {
ip := indexPack[blob.PackID]
// Set blob type if not yet set
if ip.tpe == restic.NumBlobTypes {
ip.tpe = blob.Type
}
// mark mixed packs with "Invalid blob type"
if ip.tpe != blob.Type {
ip.tpe = restic.InvalidBlob
}
bh := blob.BlobHandle
size := uint64(blob.Length)
switch {
case duplicateBlobs.Has(bh): // duplicate blob
ip.usedSize += size
ip.duplicateBlobs++
case keepBlobs.Has(bh): // used blob, not duplicate
ip.usedSize += size
ip.usedBlobs++
default: // unused blob
ip.unusedSize += size
ip.unusedBlobs++
}
if !blob.IsCompressed() {
ip.uncompressed = true
}
// update indexPack
indexPack[blob.PackID] = ip
}
Verbosef("collecting packs for deletion and repacking\n") Verbosef("collecting packs for deletion and repacking\n")
removePacksFirst := restic.NewIDSet() removePacksFirst := restic.NewIDSet()
removePacks := restic.NewIDSet() removePacks := restic.NewIDSet()

View File

@ -98,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
if err != nil { if err != nil {
return err return err
} }
packSizeFromIndex = pack.Size(ctx, repo.Index()) packSizeFromIndex = pack.Size(ctx, repo.Index(), false)
} }
Verbosef("getting pack files to read...\n") Verbosef("getting pack files to read...\n")

View File

@ -131,7 +131,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
} }
// compute pack size using index entries // compute pack size using index entries
c.packs = pack.Size(ctx, c.masterIndex) c.packs = pack.Size(ctx, c.masterIndex, false)
debug.Log("checking for duplicate packs") debug.Log("checking for duplicate packs")
for packID := range c.packs { for packID := range c.packs {

View File

@ -177,8 +177,8 @@ var (
const ( const (
// size of the header-length field at the end of the file; it is a uint32 // size of the header-length field at the end of the file; it is a uint32
headerLengthSize = 4 headerLengthSize = 4
// HeaderSize is the header's constant overhead (independent of #entries) // headerSize is the header's constant overhead (independent of #entries)
HeaderSize = headerLengthSize + crypto.Extension headerSize = headerLengthSize + crypto.Extension
// MaxHeaderSize is the max size of header including header-length field // MaxHeaderSize is the max size of header including header-length field
MaxHeaderSize = 16*1024*1024 + headerLengthSize MaxHeaderSize = 16*1024*1024 + headerLengthSize
@ -242,7 +242,7 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
// eagerly download eagerEntries header entries as part of header-length request. // eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries // only make second request if actual number of entries is greater than eagerEntries
eagerSize := eagerEntries*int(entrySize) + HeaderSize eagerSize := eagerEntries*int(entrySize) + headerSize
b, c, err := readRecords(rd, size, eagerSize) b, c, err := readRecords(rd, size, eagerSize)
if err != nil { if err != nil {
return nil, err return nil, err
@ -349,7 +349,7 @@ func CalculateEntrySize(blob restic.Blob) int {
} }
func CalculateHeaderSize(blobs []restic.Blob) int { func CalculateHeaderSize(blobs []restic.Blob) int {
size := HeaderSize size := headerSize
for _, blob := range blobs { for _, blob := range blobs {
size += CalculateEntrySize(blob) size += CalculateEntrySize(blob)
} }
@ -357,17 +357,20 @@ func CalculateHeaderSize(blobs []restic.Blob) int {
} }
// Size returns the size of all packs computed by index information. // Size returns the size of all packs computed by index information.
// If onlyHdr is set to true, only the size of the header is returned
// Note that this function only gives correct sizes, if there are no // Note that this function only gives correct sizes, if there are no
// duplicates in the index. // duplicates in the index.
func Size(ctx context.Context, mi restic.MasterIndex) map[restic.ID]int64 { func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 {
packSize := make(map[restic.ID]int64) packSize := make(map[restic.ID]int64)
for blob := range mi.Each(ctx) { for blob := range mi.Each(ctx) {
size, ok := packSize[blob.PackID] size, ok := packSize[blob.PackID]
if !ok { if !ok {
size = HeaderSize size = headerSize
} }
if !onlyHdr {
size += int64(blob.Length) size += int64(blob.Length)
}
packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob)) packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
} }