restic/internal/repository/prune.go

package repository

import (
	"context"
	"fmt"
	"math"
	"sort"

	"github.com/restic/restic/internal/errors"
	"github.com/restic/restic/internal/index"
	"github.com/restic/restic/internal/pack"
	"github.com/restic/restic/internal/restic"
	"github.com/restic/restic/internal/ui/progress"
)

var ErrIndexIncomplete = errors.Fatal("index is not complete")
var ErrPacksMissing = errors.Fatal("packs from index missing in repo")
var ErrSizeNotMatching = errors.Fatal("pack size does not match calculated size from index")

// PruneOptions collects all options for the cleanup command.
type PruneOptions struct {
	DryRun         bool
	UnsafeRecovery bool

	MaxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
	MaxRepackBytes uint64

	RepackCachableOnly bool
	RepackSmall        bool
	RepackUncompressed bool
}

type PruneStats struct {
	Blobs struct {
		Used      uint
		Duplicate uint
		Unused    uint
		Remove    uint
		Repack    uint
		Repackrm  uint
	}
	Size struct {
		Used         uint64
		Duplicate    uint64
		Unused       uint64
		Remove       uint64
		Repack       uint64
		Repackrm     uint64
		Unref        uint64
		Uncompressed uint64
	}
	Packs struct {
		Used       uint
		Unused     uint
		PartlyUsed uint
		Unref      uint
		Keep       uint
		Repack     uint
		Remove     uint
	}
}

type PrunePlan struct {
	removePacksFirst restic.IDSet          // packs to remove first (unreferenced packs)
	repackPacks      restic.IDSet          // packs to repack
	keepBlobs        restic.CountedBlobSet // blobs to keep during repacking
	removePacks      restic.IDSet          // packs to remove
	ignorePacks      restic.IDSet          // packs to ignore when rebuilding the index
}

type packInfo struct {
	usedBlobs    uint
	unusedBlobs  uint
	usedSize     uint64
	unusedSize   uint64
	tpe          restic.BlobType
	uncompressed bool
}

type packInfoWithID struct {
	ID restic.ID
	packInfo
	mustCompress bool
}

// PlanPrune selects which files to rewrite and which to delete and which blobs to keep.
// Also some summary statistics are returned.
func PlanPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error), printer progress.Printer) (PrunePlan, PruneStats, error) {
	var stats PruneStats

	if opts.UnsafeRecovery {
		// prevent repacking data to make sure users cannot get stuck.
		opts.MaxRepackBytes = 0
	}
	if repo.Connections() < 2 {
		return PrunePlan{}, stats, fmt.Errorf("prune requires a backend connection limit of at least two")
	}
	if repo.Config().Version < 2 && opts.RepackUncompressed {
		return PrunePlan{}, stats, fmt.Errorf("compression requires at least repository format version 2")
	}

	usedBlobs, err := getUsedBlobs(ctx, repo)
	if err != nil {
		return PrunePlan{}, stats, err
	}

	printer.P("searching used packs...\n")
	keepBlobs, indexPack, err := packInfoFromIndex(ctx, repo.Index(), usedBlobs, &stats, printer)
	if err != nil {
		return PrunePlan{}, stats, err
	}

	printer.P("collecting packs for deletion and repacking\n")
	plan, err := decidePackAction(ctx, opts, repo, indexPack, &stats, printer)
	if err != nil {
		return PrunePlan{}, stats, err
	}

	if len(plan.repackPacks) != 0 {
		blobCount := keepBlobs.Len()
		// when repacking, we do not want to keep blobs which are
		// already contained in kept packs, so delete them from keepBlobs
		repo.Index().Each(ctx, func(blob restic.PackedBlob) {
			if plan.removePacks.Has(blob.PackID) || plan.repackPacks.Has(blob.PackID) {
				return
			}
			keepBlobs.Delete(blob.BlobHandle)
		})

		if keepBlobs.Len() < blobCount/2 {
			// replace with copy to shrink map to necessary size if there's a chance to benefit
			keepBlobs = keepBlobs.Copy()
		}
	} else {
		// keepBlobs is only needed if packs are repacked
		keepBlobs = nil
	}
	plan.keepBlobs = keepBlobs

	return plan, stats, nil
}

func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) {
	// iterate over all blobs in index to find out which blobs are duplicates
	// The counter in usedBlobs describes how many instances of the blob exist in the repository index
	// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
	idx.Each(ctx, func(blob restic.PackedBlob) {
		bh := blob.BlobHandle
		count, ok := usedBlobs[bh]
		if ok {
			if count < math.MaxUint8 {
				// don't overflow, but saturate count at 255
				// this can lead to a non-optimal pack selection, but won't cause
				// problems otherwise
				count++
			}

			usedBlobs[bh] = count
		}
	})

	// Check if all used blobs have been found in index
	missingBlobs := restic.NewBlobSet()
	for bh, count := range usedBlobs {
		if count == 0 {
			// blob does not exist in any pack files
			missingBlobs.Insert(bh)
		}
	}

	if len(missingBlobs) != 0 {
		printer.E("%v not found in the index\n\n"+
			"Integrity check failed: Data seems to be missing.\n"+
			"Will not start prune to prevent (additional) data loss!\n"+
			"Please report this error (along with the output of the 'prune' run) at\n"+
			"https://github.com/restic/restic/issues/new/choose\n", missingBlobs)
		return nil, nil, ErrIndexIncomplete
	}

	indexPack := make(map[restic.ID]packInfo)

	// save computed pack header size
	for pid, hdrSize := range pack.Size(ctx, idx, true) {
		// initialize tpe with NumBlobTypes to indicate it's not set
		indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
	}

	hasDuplicates := false
	// iterate over all blobs in index to generate packInfo
	idx.Each(ctx, func(blob restic.PackedBlob) {
		ip := indexPack[blob.PackID]

		// Set blob type if not yet set
		if ip.tpe == restic.NumBlobTypes {
			ip.tpe = blob.Type
		}

		// mark mixed packs with "Invalid blob type"
		if ip.tpe != blob.Type {
			ip.tpe = restic.InvalidBlob
		}

		bh := blob.BlobHandle
		size := uint64(blob.Length)
		dupCount := usedBlobs[bh]
		switch {
		case dupCount >= 2:
			hasDuplicates = true
			// mark as unused for now, we will later on select one copy
			ip.unusedSize += size
			ip.unusedBlobs++

			// count as duplicate, will later on change one copy to be counted as used
			stats.Size.Duplicate += size
			stats.Blobs.Duplicate++
		case dupCount == 1: // used blob, not duplicate
			ip.usedSize += size
			ip.usedBlobs++

			stats.Size.Used += size
			stats.Blobs.Used++
		default: // unused blob
			ip.unusedSize += size
			ip.unusedBlobs++

			stats.Size.Unused += size
			stats.Blobs.Unused++
		}
		if !blob.IsCompressed() {
			ip.uncompressed = true
		}
		// update indexPack
		indexPack[blob.PackID] = ip
	})

	// if duplicate blobs exist, those will be set to either "used" or "unused":
	// - mark only one occurrence of duplicate blobs as used
	// - if there are already some used blobs in a pack, possibly mark duplicates in this pack as "used"
	// - if there are no used blobs in a pack, possibly mark duplicates as "unused"
	if hasDuplicates {
		// iterate again over all blobs in index (this is pretty cheap, all in-mem)
		idx.Each(ctx, func(blob restic.PackedBlob) {
			bh := blob.BlobHandle
			count, ok := usedBlobs[bh]
			// skip non-duplicate, aka. normal blobs
			// count == 0 is used to mark that this was a duplicate blob with only a single occurrence remaining
			if !ok || count == 1 {
				return
			}

			ip := indexPack[blob.PackID]
			size := uint64(blob.Length)
			switch {
			case ip.usedBlobs > 0, count == 0:
				// other used blobs in pack or "last" occurrence ->  transition to used
				ip.usedSize += size
				ip.usedBlobs++
				ip.unusedSize -= size
				ip.unusedBlobs--
				// same for the global statistics
				stats.Size.Used += size
				stats.Blobs.Used++
				stats.Size.Duplicate -= size
				stats.Blobs.Duplicate--
				// let other occurrences remain marked as unused
				usedBlobs[bh] = 1
			default:
				// remain unused and decrease counter
				count--
				if count == 1 {
					// setting count to 1 would lead to forgetting that this blob had duplicates
					// thus use the special value zero. This will select the last instance of the blob for keeping.
					count = 0
				}
				usedBlobs[bh] = count
			}
			// update indexPack
			indexPack[blob.PackID] = ip
		})
	}

	// Sanity check. If no duplicates exist, all blobs have value 1. After handling
	// duplicates, this also applies to duplicates.
	for _, count := range usedBlobs {
		if count != 1 {
			panic("internal error during blob selection")
		}
	}

	return usedBlobs, indexPack, nil
}

func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Repository, indexPack map[restic.ID]packInfo, stats *PruneStats, printer progress.Printer) (PrunePlan, error) {
	removePacksFirst := restic.NewIDSet()
	removePacks := restic.NewIDSet()
	repackPacks := restic.NewIDSet()

	var repackCandidates []packInfoWithID
	var repackSmallCandidates []packInfoWithID
	repoVersion := repo.Config().Version
	// only repack very small files by default
	targetPackSize := repo.PackSize() / 25
	if opts.RepackSmall {
		// consider files with at least 80% of the target size as large enough
		targetPackSize = repo.PackSize() / 5 * 4
	}

	// loop over all packs and decide what to do
	bar := printer.NewCounter("packs processed")
	bar.SetMax(uint64(len(indexPack)))
	err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
		p, ok := indexPack[id]
		if !ok {
			// Pack was not referenced in index and is not used  => immediately remove!
			printer.V("will remove pack %v as it is unused and not indexed\n", id.Str())
			removePacksFirst.Insert(id)
			stats.Size.Unref += uint64(packSize)
			return nil
		}

		if p.unusedSize+p.usedSize != uint64(packSize) && p.usedBlobs != 0 {
			// Pack size does not fit and pack is needed => error
			// If the pack is not needed, this is no error, the pack can
			// and will be simply removed, see below.
			printer.E("pack %s: calculated size %d does not match real size %d\nRun 'restic repair index'.\n",
				id.Str(), p.unusedSize+p.usedSize, packSize)
			return ErrSizeNotMatching
		}

		// statistics
		switch {
		case p.usedBlobs == 0:
			stats.Packs.Unused++
		case p.unusedBlobs == 0:
			stats.Packs.Used++
		default:
			stats.Packs.PartlyUsed++
		}

		if p.uncompressed {
			stats.Size.Uncompressed += p.unusedSize + p.usedSize
		}
		mustCompress := false
		if repoVersion >= 2 {
			// repo v2: always repack tree blobs if uncompressed
			// compress data blobs if requested
			mustCompress = (p.tpe == restic.TreeBlob || opts.RepackUncompressed) && p.uncompressed
		}

		// decide what to do
		switch {
		case p.usedBlobs == 0:
			// All blobs in pack are no longer used => remove pack!
			removePacks.Insert(id)
			stats.Blobs.Remove += p.unusedBlobs
			stats.Size.Remove += p.unusedSize

		case opts.RepackCachableOnly && p.tpe == restic.DataBlob:
			// if this is a data pack and --repack-cacheable-only is set => keep pack!
			stats.Packs.Keep++

		case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:
			if packSize >= int64(targetPackSize) {
				// All blobs in pack are used and not mixed => keep pack!
				stats.Packs.Keep++
			} else {
				repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})
			}

		default:
			// all other packs are candidates for repacking
			repackCandidates = append(repackCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})
		}

		delete(indexPack, id)
		bar.Add(1)
		return nil
	})
	bar.Done()
	if err != nil {
		return PrunePlan{}, err
	}

	// At this point indexPacks contains only missing packs!

	// missing packs that are not needed can be ignored
	ignorePacks := restic.NewIDSet()
	for id, p := range indexPack {
		if p.usedBlobs == 0 {
			ignorePacks.Insert(id)
			stats.Blobs.Remove += p.unusedBlobs
			stats.Size.Remove += p.unusedSize
			delete(indexPack, id)
		}
	}

	if len(indexPack) != 0 {
		printer.E("The index references %d needed pack files which are missing from the repository:\n", len(indexPack))
		for id := range indexPack {
			printer.E("  %v\n", id)
		}
		return PrunePlan{}, ErrPacksMissing
	}
	if len(ignorePacks) != 0 {
		printer.E("Missing but unneeded pack files are referenced in the index, will be repaired\n")
		for id := range ignorePacks {
			printer.E("will forget missing pack file %v\n", id)
		}
	}

	if len(repackSmallCandidates) < 10 {
		// too few small files to be worth the trouble, this also prevents endlessly repacking
		// if there is just a single pack file below the target size
		stats.Packs.Keep += uint(len(repackSmallCandidates))
	} else {
		repackCandidates = append(repackCandidates, repackSmallCandidates...)
	}

	// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
	// This is equivalent to sorting by unused / total space.
	// Instead of unused[i] / used[i] > unused[j] / used[j] we use
	// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64
	// Moreover packs containing trees and too small packs are sorted to the beginning
	sort.Slice(repackCandidates, func(i, j int) bool {
		pi := repackCandidates[i].packInfo
		pj := repackCandidates[j].packInfo
		switch {
		case pi.tpe != restic.DataBlob && pj.tpe == restic.DataBlob:
			return true
		case pj.tpe != restic.DataBlob && pi.tpe == restic.DataBlob:
			return false
		case pi.unusedSize+pi.usedSize < uint64(targetPackSize) && pj.unusedSize+pj.usedSize >= uint64(targetPackSize):
			return true
		case pj.unusedSize+pj.usedSize < uint64(targetPackSize) && pi.unusedSize+pi.usedSize >= uint64(targetPackSize):
			return false
		}
		return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize
	})

	repack := func(id restic.ID, p packInfo) {
		repackPacks.Insert(id)
		stats.Blobs.Repack += p.unusedBlobs + p.usedBlobs
		stats.Size.Repack += p.unusedSize + p.usedSize
		stats.Blobs.Repackrm += p.unusedBlobs
		stats.Size.Repackrm += p.unusedSize
		if p.uncompressed {
			stats.Size.Uncompressed -= p.unusedSize + p.usedSize
		}
	}

	// calculate limit for number of unused bytes in the repo after repacking
	maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)

	for _, p := range repackCandidates {
		reachedUnusedSizeAfter := (stats.Size.Unused-stats.Size.Remove-stats.Size.Repackrm < maxUnusedSizeAfter)
		reachedRepackSize := stats.Size.Repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes
		packIsLargeEnough := p.unusedSize+p.usedSize >= uint64(targetPackSize)

		switch {
		case reachedRepackSize:
			stats.Packs.Keep++

		case p.tpe != restic.DataBlob, p.mustCompress:
			// repacking non-data packs / uncompressed-trees is only limited by repackSize
			repack(p.ID, p.packInfo)

		case reachedUnusedSizeAfter && packIsLargeEnough:
			// for all other packs stop repacking if tolerated unused size is reached.
			stats.Packs.Keep++

		default:
			repack(p.ID, p.packInfo)
		}
	}

	stats.Packs.Unref = uint(len(removePacksFirst))
	stats.Packs.Repack = uint(len(repackPacks))
	stats.Packs.Remove = uint(len(removePacks))

	if repo.Config().Version < 2 {
		// compression not supported for repository format version 1
		stats.Size.Uncompressed = 0
	}

	return PrunePlan{removePacksFirst: removePacksFirst,
		removePacks: removePacks,
		repackPacks: repackPacks,
		ignorePacks: ignorePacks,
	}, nil
}

// DoPrune does the actual pruning:
// - remove unreferenced packs first
// - repack given pack files while keeping the given blobs
// - rebuild the index while ignoring all files that will be deleted
// - delete the files
// plan.removePacks and plan.ignorePacks are modified in this function.
func DoPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, plan PrunePlan, printer progress.Printer) (err error) {
	if opts.DryRun {
		printer.V("Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n")
		if len(plan.removePacksFirst) > 0 {
			printer.V("Would have removed the following unreferenced packs:\n%v\n\n", plan.removePacksFirst)
		}
		printer.V("Would have repacked and removed the following packs:\n%v\n\n", plan.repackPacks)
		printer.V("Would have removed the following no longer used packs:\n%v\n\n", plan.removePacks)
		// Always quit here if DryRun was set!
		return nil
	}

	// unreferenced packs can be safely deleted first
	if len(plan.removePacksFirst) != 0 {
		printer.P("deleting unreferenced packs\n")
		_ = deleteFiles(ctx, true, repo, plan.removePacksFirst, restic.PackFile, printer)
	}

	if len(plan.repackPacks) != 0 {
		printer.P("repacking packs\n")
		bar := printer.NewCounter("packs repacked")
		bar.SetMax(uint64(len(plan.repackPacks)))
		_, err := Repack(ctx, repo, repo, plan.repackPacks, plan.keepBlobs, bar)
		bar.Done()
		if err != nil {
			return errors.Fatal(err.Error())
		}

		// Also remove repacked packs
		plan.removePacks.Merge(plan.repackPacks)

		if len(plan.keepBlobs) != 0 {
			printer.E("%v was not repacked\n\n"+
				"Integrity check failed.\n"+
				"Please report this error (along with the output of the 'prune' run) at\n"+
				"https://github.com/restic/restic/issues/new/choose\n", plan.keepBlobs)
			return errors.Fatal("internal error: blobs were not repacked")
		}

		// allow GC of the blob set
		plan.keepBlobs = nil
	}

	if len(plan.ignorePacks) == 0 {
		plan.ignorePacks = plan.removePacks
	} else {
		plan.ignorePacks.Merge(plan.removePacks)
	}

	if opts.UnsafeRecovery {
		printer.P("deleting index files\n")
		indexFiles := repo.Index().(*index.MasterIndex).IDs()
		err = deleteFiles(ctx, false, repo, indexFiles, restic.IndexFile, printer)
		if err != nil {
			return errors.Fatalf("%s", err)
		}
	} else if len(plan.ignorePacks) != 0 {
		err = rebuildIndexFiles(ctx, repo, plan.ignorePacks, nil, false, printer)
		if err != nil {
			return errors.Fatalf("%s", err)
		}
	}

	if len(plan.removePacks) != 0 {
		printer.P("removing %d old packs\n", len(plan.removePacks))
		_ = deleteFiles(ctx, true, repo, plan.removePacks, restic.PackFile, printer)
	}

	if opts.UnsafeRecovery {
		err = rebuildIndexFiles(ctx, repo, plan.ignorePacks, nil, true, printer)
		if err != nil {
			return errors.Fatalf("%s", err)
		}
	}

	printer.P("done\n")
	return nil
}

// deleteFiles deletes the given fileList of fileType in parallel
// if ignoreError=true, it will print a warning if there was an error, else it will abort.
func deleteFiles(ctx context.Context, ignoreError bool, repo restic.Repository, fileList restic.IDSet, fileType restic.FileType, printer progress.Printer) error {
	bar := printer.NewCounter("files deleted")
	defer bar.Done()

	return restic.ParallelRemove(ctx, repo, fileList, fileType, func(id restic.ID, err error) error {
		if err != nil {
			printer.E("unable to remove %v/%v from the repository\n", fileType, id)
			if !ignoreError {
				return err
			}
		}
		printer.VV("removed %v/%v\n", fileType, id)
		return nil
	}, bar)
}
prune: move logic into repository package 2024-04-06 17:21:21 +00:00			`package repository`

			`import (`
			`"context"`
prune: move additional option checks to repository 2024-04-06 17:49:03 +00:00			`"fmt"`
prune: move logic into repository package 2024-04-06 17:21:21 +00:00			`"math"`
			`"sort"`

			`"github.com/restic/restic/internal/errors"`
			`"github.com/restic/restic/internal/index"`
			`"github.com/restic/restic/internal/pack"`
			`"github.com/restic/restic/internal/restic"`
			`"github.com/restic/restic/internal/ui/progress"`
			`)`

			`var ErrIndexIncomplete = errors.Fatal("index is not complete")`
			`var ErrPacksMissing = errors.Fatal("packs from index missing in repo")`
			`var ErrSizeNotMatching = errors.Fatal("pack size does not match calculated size from index")`

			`// PruneOptions collects all options for the cleanup command.`
			`type PruneOptions struct {`
			`DryRun bool`
			`UnsafeRecovery bool`

			`MaxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused`
			`MaxRepackBytes uint64`

			`RepackCachableOnly bool`
			`RepackSmall bool`
			`RepackUncompressed bool`
			`}`

			`type PruneStats struct {`
			`Blobs struct {`
			`Used uint`
			`Duplicate uint`
			`Unused uint`
			`Remove uint`
			`Repack uint`
			`Repackrm uint`
			`}`
			`Size struct {`
			`Used uint64`
			`Duplicate uint64`
			`Unused uint64`
			`Remove uint64`
			`Repack uint64`
			`Repackrm uint64`
			`Unref uint64`
			`Uncompressed uint64`
			`}`
			`Packs struct {`
			`Used uint`
			`Unused uint`
			`PartlyUsed uint`
			`Unref uint`
			`Keep uint`
			`Repack uint`
			`Remove uint`
			`}`
			`}`

			`type PrunePlan struct {`
			`removePacksFirst restic.IDSet // packs to remove first (unreferenced packs)`
			`repackPacks restic.IDSet // packs to repack`
			`keepBlobs restic.CountedBlobSet // blobs to keep during repacking`
			`removePacks restic.IDSet // packs to remove`
			`ignorePacks restic.IDSet // packs to ignore when rebuilding the index`
			`}`

			`type packInfo struct {`
			`usedBlobs uint`
			`unusedBlobs uint`
			`usedSize uint64`
			`unusedSize uint64`
			`tpe restic.BlobType`
			`uncompressed bool`
			`}`

			`type packInfoWithID struct {`
			`ID restic.ID`
			`packInfo`
			`mustCompress bool`
			`}`

			`// PlanPrune selects which files to rewrite and which to delete and which blobs to keep.`
			`// Also some summary statistics are returned.`
			`func PlanPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, getUsedBlobs func(ctx context.Context, repo restic.Repository) (usedBlobs restic.CountedBlobSet, err error), printer progress.Printer) (PrunePlan, PruneStats, error) {`
			`var stats PruneStats`

prune: move additional option checks to repository 2024-04-06 17:49:03 +00:00			`if opts.UnsafeRecovery {`
			`// prevent repacking data to make sure users cannot get stuck.`
			`opts.MaxRepackBytes = 0`
			`}`
			`if repo.Connections() < 2 {`
			`return PrunePlan{}, stats, fmt.Errorf("prune requires a backend connection limit of at least two")`
			`}`
			`if repo.Config().Version < 2 && opts.RepackUncompressed {`
			`return PrunePlan{}, stats, fmt.Errorf("compression requires at least repository format version 2")`
			`}`

prune: move logic into repository package 2024-04-06 17:21:21 +00:00			`usedBlobs, err := getUsedBlobs(ctx, repo)`
			`if err != nil {`
			`return PrunePlan{}, stats, err`
			`}`

			`printer.P("searching used packs...\n")`
			`keepBlobs, indexPack, err := packInfoFromIndex(ctx, repo.Index(), usedBlobs, &stats, printer)`
			`if err != nil {`
			`return PrunePlan{}, stats, err`
			`}`

			`printer.P("collecting packs for deletion and repacking\n")`
			`plan, err := decidePackAction(ctx, opts, repo, indexPack, &stats, printer)`
			`if err != nil {`
			`return PrunePlan{}, stats, err`
			`}`

			`if len(plan.repackPacks) != 0 {`
			`blobCount := keepBlobs.Len()`
			`// when repacking, we do not want to keep blobs which are`
			`// already contained in kept packs, so delete them from keepBlobs`
			`repo.Index().Each(ctx, func(blob restic.PackedBlob) {`
			`if plan.removePacks.Has(blob.PackID) \|\| plan.repackPacks.Has(blob.PackID) {`
			`return`
			`}`
			`keepBlobs.Delete(blob.BlobHandle)`
			`})`

			`if keepBlobs.Len() < blobCount/2 {`
			`// replace with copy to shrink map to necessary size if there's a chance to benefit`
			`keepBlobs = keepBlobs.Copy()`
			`}`
			`} else {`
			`// keepBlobs is only needed if packs are repacked`
			`keepBlobs = nil`
			`}`
			`plan.keepBlobs = keepBlobs`

			`return plan, stats, nil`
			`}`

			`func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs restic.CountedBlobSet, stats *PruneStats, printer progress.Printer) (restic.CountedBlobSet, map[restic.ID]packInfo, error) {`
			`// iterate over all blobs in index to find out which blobs are duplicates`
			`// The counter in usedBlobs describes how many instances of the blob exist in the repository index`
			`// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist`
			`idx.Each(ctx, func(blob restic.PackedBlob) {`
			`bh := blob.BlobHandle`
			`count, ok := usedBlobs[bh]`
			`if ok {`
			`if count < math.MaxUint8 {`
			`// don't overflow, but saturate count at 255`
			`// this can lead to a non-optimal pack selection, but won't cause`
			`// problems otherwise`
			`count++`
			`}`

			`usedBlobs[bh] = count`
			`}`
			`})`

			`// Check if all used blobs have been found in index`
			`missingBlobs := restic.NewBlobSet()`
			`for bh, count := range usedBlobs {`
			`if count == 0 {`
			`// blob does not exist in any pack files`
			`missingBlobs.Insert(bh)`
			`}`
			`}`

			`if len(missingBlobs) != 0 {`
			`printer.E("%v not found in the index\n\n"+`
			`"Integrity check failed: Data seems to be missing.\n"+`
			`"Will not start prune to prevent (additional) data loss!\n"+`
			`"Please report this error (along with the output of the 'prune' run) at\n"+`
			`"https://github.com/restic/restic/issues/new/choose\n", missingBlobs)`
			`return nil, nil, ErrIndexIncomplete`
			`}`

			`indexPack := make(map[restic.ID]packInfo)`

			`// save computed pack header size`
			`for pid, hdrSize := range pack.Size(ctx, idx, true) {`
			`// initialize tpe with NumBlobTypes to indicate it's not set`
			`indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}`
			`}`

			`hasDuplicates := false`
			`// iterate over all blobs in index to generate packInfo`
			`idx.Each(ctx, func(blob restic.PackedBlob) {`
			`ip := indexPack[blob.PackID]`

			`// Set blob type if not yet set`
			`if ip.tpe == restic.NumBlobTypes {`
			`ip.tpe = blob.Type`
			`}`

			`// mark mixed packs with "Invalid blob type"`
			`if ip.tpe != blob.Type {`
			`ip.tpe = restic.InvalidBlob`
			`}`

			`bh := blob.BlobHandle`
			`size := uint64(blob.Length)`
			`dupCount := usedBlobs[bh]`
			`switch {`
			`case dupCount >= 2:`
			`hasDuplicates = true`
			`// mark as unused for now, we will later on select one copy`
			`ip.unusedSize += size`
			`ip.unusedBlobs++`

			`// count as duplicate, will later on change one copy to be counted as used`
			`stats.Size.Duplicate += size`
			`stats.Blobs.Duplicate++`
			`case dupCount == 1: // used blob, not duplicate`
			`ip.usedSize += size`
			`ip.usedBlobs++`

			`stats.Size.Used += size`
			`stats.Blobs.Used++`
			`default: // unused blob`
			`ip.unusedSize += size`
			`ip.unusedBlobs++`

			`stats.Size.Unused += size`
			`stats.Blobs.Unused++`
			`}`
			`if !blob.IsCompressed() {`
			`ip.uncompressed = true`
			`}`
			`// update indexPack`
			`indexPack[blob.PackID] = ip`
			`})`

			`// if duplicate blobs exist, those will be set to either "used" or "unused":`
			`// - mark only one occurrence of duplicate blobs as used`
			`// - if there are already some used blobs in a pack, possibly mark duplicates in this pack as "used"`
			`// - if there are no used blobs in a pack, possibly mark duplicates as "unused"`
			`if hasDuplicates {`
			`// iterate again over all blobs in index (this is pretty cheap, all in-mem)`
			`idx.Each(ctx, func(blob restic.PackedBlob) {`
			`bh := blob.BlobHandle`
			`count, ok := usedBlobs[bh]`
			`// skip non-duplicate, aka. normal blobs`
			`// count == 0 is used to mark that this was a duplicate blob with only a single occurrence remaining`
			`if !ok \|\| count == 1 {`
			`return`
			`}`

			`ip := indexPack[blob.PackID]`
			`size := uint64(blob.Length)`
			`switch {`
			`case ip.usedBlobs > 0, count == 0:`
			`// other used blobs in pack or "last" occurrence -> transition to used`
			`ip.usedSize += size`
			`ip.usedBlobs++`
			`ip.unusedSize -= size`
			`ip.unusedBlobs--`
			`// same for the global statistics`
			`stats.Size.Used += size`
			`stats.Blobs.Used++`
			`stats.Size.Duplicate -= size`
			`stats.Blobs.Duplicate--`
			`// let other occurrences remain marked as unused`
			`usedBlobs[bh] = 1`
			`default:`
			`// remain unused and decrease counter`
			`count--`
			`if count == 1 {`
			`// setting count to 1 would lead to forgetting that this blob had duplicates`
			`// thus use the special value zero. This will select the last instance of the blob for keeping.`
			`count = 0`
			`}`
			`usedBlobs[bh] = count`
			`}`
			`// update indexPack`
			`indexPack[blob.PackID] = ip`
			`})`
			`}`

			`// Sanity check. If no duplicates exist, all blobs have value 1. After handling`
			`// duplicates, this also applies to duplicates.`
			`for _, count := range usedBlobs {`
			`if count != 1 {`
			`panic("internal error during blob selection")`
			`}`
			`}`

			`return usedBlobs, indexPack, nil`
			`}`

			`func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Repository, indexPack map[restic.ID]packInfo, stats *PruneStats, printer progress.Printer) (PrunePlan, error) {`
			`removePacksFirst := restic.NewIDSet()`
			`removePacks := restic.NewIDSet()`
			`repackPacks := restic.NewIDSet()`

			`var repackCandidates []packInfoWithID`
			`var repackSmallCandidates []packInfoWithID`
			`repoVersion := repo.Config().Version`
			`// only repack very small files by default`
			`targetPackSize := repo.PackSize() / 25`
			`if opts.RepackSmall {`
			`// consider files with at least 80% of the target size as large enough`
			`targetPackSize = repo.PackSize() / 5 * 4`
			`}`

			`// loop over all packs and decide what to do`
			`bar := printer.NewCounter("packs processed")`
			`bar.SetMax(uint64(len(indexPack)))`
			`err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {`
			`p, ok := indexPack[id]`
			`if !ok {`
			`// Pack was not referenced in index and is not used => immediately remove!`
			`printer.V("will remove pack %v as it is unused and not indexed\n", id.Str())`
			`removePacksFirst.Insert(id)`
			`stats.Size.Unref += uint64(packSize)`
			`return nil`
			`}`

			`if p.unusedSize+p.usedSize != uint64(packSize) && p.usedBlobs != 0 {`
			`// Pack size does not fit and pack is needed => error`
			`// If the pack is not needed, this is no error, the pack can`
			`// and will be simply removed, see below.`
			`printer.E("pack %s: calculated size %d does not match real size %d\nRun 'restic repair index'.\n",`
			`id.Str(), p.unusedSize+p.usedSize, packSize)`
			`return ErrSizeNotMatching`
			`}`

			`// statistics`
			`switch {`
			`case p.usedBlobs == 0:`
			`stats.Packs.Unused++`
			`case p.unusedBlobs == 0:`
			`stats.Packs.Used++`
			`default:`
			`stats.Packs.PartlyUsed++`
			`}`

			`if p.uncompressed {`
			`stats.Size.Uncompressed += p.unusedSize + p.usedSize`
			`}`
			`mustCompress := false`
			`if repoVersion >= 2 {`
			`// repo v2: always repack tree blobs if uncompressed`
			`// compress data blobs if requested`
			`mustCompress = (p.tpe == restic.TreeBlob \|\| opts.RepackUncompressed) && p.uncompressed`
			`}`

			`// decide what to do`
			`switch {`
			`case p.usedBlobs == 0:`
			`// All blobs in pack are no longer used => remove pack!`
			`removePacks.Insert(id)`
			`stats.Blobs.Remove += p.unusedBlobs`
			`stats.Size.Remove += p.unusedSize`

			`case opts.RepackCachableOnly && p.tpe == restic.DataBlob:`
			`// if this is a data pack and --repack-cacheable-only is set => keep pack!`
			`stats.Packs.Keep++`

			`case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:`
			`if packSize >= int64(targetPackSize) {`
			`// All blobs in pack are used and not mixed => keep pack!`
			`stats.Packs.Keep++`
			`} else {`
			`repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})`
			`}`

			`default:`
			`// all other packs are candidates for repacking`
			`repackCandidates = append(repackCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})`
			`}`

			`delete(indexPack, id)`
			`bar.Add(1)`
			`return nil`
			`})`
			`bar.Done()`
			`if err != nil {`
			`return PrunePlan{}, err`
			`}`

			`// At this point indexPacks contains only missing packs!`

			`// missing packs that are not needed can be ignored`
			`ignorePacks := restic.NewIDSet()`
			`for id, p := range indexPack {`
			`if p.usedBlobs == 0 {`
			`ignorePacks.Insert(id)`
			`stats.Blobs.Remove += p.unusedBlobs`
			`stats.Size.Remove += p.unusedSize`
			`delete(indexPack, id)`
			`}`
			`}`

			`if len(indexPack) != 0 {`
			`printer.E("The index references %d needed pack files which are missing from the repository:\n", len(indexPack))`
			`for id := range indexPack {`
			`printer.E(" %v\n", id)`
			`}`
			`return PrunePlan{}, ErrPacksMissing`
			`}`
			`if len(ignorePacks) != 0 {`
			`printer.E("Missing but unneeded pack files are referenced in the index, will be repaired\n")`
			`for id := range ignorePacks {`
			`printer.E("will forget missing pack file %v\n", id)`
			`}`
			`}`

			`if len(repackSmallCandidates) < 10 {`
			`// too few small files to be worth the trouble, this also prevents endlessly repacking`
			`// if there is just a single pack file below the target size`
			`stats.Packs.Keep += uint(len(repackSmallCandidates))`
			`} else {`
			`repackCandidates = append(repackCandidates, repackSmallCandidates...)`
			`}`

			`// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.`
			`// This is equivalent to sorting by unused / total space.`
			`// Instead of unused[i] / used[i] > unused[j] / used[j] we use`
			`// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64`
			`// Moreover packs containing trees and too small packs are sorted to the beginning`
			`sort.Slice(repackCandidates, func(i, j int) bool {`
			`pi := repackCandidates[i].packInfo`
			`pj := repackCandidates[j].packInfo`
			`switch {`
			`case pi.tpe != restic.DataBlob && pj.tpe == restic.DataBlob:`
			`return true`
			`case pj.tpe != restic.DataBlob && pi.tpe == restic.DataBlob:`
			`return false`
			`case pi.unusedSize+pi.usedSize < uint64(targetPackSize) && pj.unusedSize+pj.usedSize >= uint64(targetPackSize):`
			`return true`
			`case pj.unusedSize+pj.usedSize < uint64(targetPackSize) && pi.unusedSize+pi.usedSize >= uint64(targetPackSize):`
			`return false`
			`}`
			`return pi.unusedSizepj.usedSize > pj.unusedSizepi.usedSize`
			`})`

			`repack := func(id restic.ID, p packInfo) {`
			`repackPacks.Insert(id)`
			`stats.Blobs.Repack += p.unusedBlobs + p.usedBlobs`
			`stats.Size.Repack += p.unusedSize + p.usedSize`
			`stats.Blobs.Repackrm += p.unusedBlobs`
			`stats.Size.Repackrm += p.unusedSize`
			`if p.uncompressed {`
			`stats.Size.Uncompressed -= p.unusedSize + p.usedSize`
			`}`
			`}`

			`// calculate limit for number of unused bytes in the repo after repacking`
			`maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)`

			`for _, p := range repackCandidates {`
			`reachedUnusedSizeAfter := (stats.Size.Unused-stats.Size.Remove-stats.Size.Repackrm < maxUnusedSizeAfter)`
			`reachedRepackSize := stats.Size.Repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes`
			`packIsLargeEnough := p.unusedSize+p.usedSize >= uint64(targetPackSize)`

			`switch {`
			`case reachedRepackSize:`
			`stats.Packs.Keep++`

			`case p.tpe != restic.DataBlob, p.mustCompress:`
			`// repacking non-data packs / uncompressed-trees is only limited by repackSize`
			`repack(p.ID, p.packInfo)`

			`case reachedUnusedSizeAfter && packIsLargeEnough:`
			`// for all other packs stop repacking if tolerated unused size is reached.`
			`stats.Packs.Keep++`

			`default:`
			`repack(p.ID, p.packInfo)`
			`}`
			`}`

			`stats.Packs.Unref = uint(len(removePacksFirst))`
			`stats.Packs.Repack = uint(len(repackPacks))`
			`stats.Packs.Remove = uint(len(removePacks))`

			`if repo.Config().Version < 2 {`
			`// compression not supported for repository format version 1`
			`stats.Size.Uncompressed = 0`
			`}`

			`return PrunePlan{removePacksFirst: removePacksFirst,`
			`removePacks: removePacks,`
			`repackPacks: repackPacks,`
			`ignorePacks: ignorePacks,`
			`}, nil`
			`}`

			`// DoPrune does the actual pruning:`
			`// - remove unreferenced packs first`
			`// - repack given pack files while keeping the given blobs`
			`// - rebuild the index while ignoring all files that will be deleted`
			`// - delete the files`
			`// plan.removePacks and plan.ignorePacks are modified in this function.`
			`func DoPrune(ctx context.Context, opts PruneOptions, repo restic.Repository, plan PrunePlan, printer progress.Printer) (err error) {`
			`if opts.DryRun {`
			`printer.V("Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n")`
			`if len(plan.removePacksFirst) > 0 {`
			`printer.V("Would have removed the following unreferenced packs:\n%v\n\n", plan.removePacksFirst)`
			`}`
			`printer.V("Would have repacked and removed the following packs:\n%v\n\n", plan.repackPacks)`
			`printer.V("Would have removed the following no longer used packs:\n%v\n\n", plan.removePacks)`
			`// Always quit here if DryRun was set!`
			`return nil`
			`}`

			`// unreferenced packs can be safely deleted first`
			`if len(plan.removePacksFirst) != 0 {`
			`printer.P("deleting unreferenced packs\n")`
			`_ = deleteFiles(ctx, true, repo, plan.removePacksFirst, restic.PackFile, printer)`
			`}`

			`if len(plan.repackPacks) != 0 {`
			`printer.P("repacking packs\n")`
			`bar := printer.NewCounter("packs repacked")`
			`bar.SetMax(uint64(len(plan.repackPacks)))`
			`_, err := Repack(ctx, repo, repo, plan.repackPacks, plan.keepBlobs, bar)`
			`bar.Done()`
			`if err != nil {`
			`return errors.Fatal(err.Error())`
			`}`

			`// Also remove repacked packs`
			`plan.removePacks.Merge(plan.repackPacks)`

			`if len(plan.keepBlobs) != 0 {`
			`printer.E("%v was not repacked\n\n"+`
			`"Integrity check failed.\n"+`
			`"Please report this error (along with the output of the 'prune' run) at\n"+`
			`"https://github.com/restic/restic/issues/new/choose\n", plan.keepBlobs)`
			`return errors.Fatal("internal error: blobs were not repacked")`
			`}`

			`// allow GC of the blob set`
			`plan.keepBlobs = nil`
			`}`

			`if len(plan.ignorePacks) == 0 {`
			`plan.ignorePacks = plan.removePacks`
			`} else {`
			`plan.ignorePacks.Merge(plan.removePacks)`
			`}`

			`if opts.UnsafeRecovery {`
			`printer.P("deleting index files\n")`
			`indexFiles := repo.Index().(*index.MasterIndex).IDs()`
			`err = deleteFiles(ctx, false, repo, indexFiles, restic.IndexFile, printer)`
			`if err != nil {`
			`return errors.Fatalf("%s", err)`
			`}`
			`} else if len(plan.ignorePacks) != 0 {`
			`err = rebuildIndexFiles(ctx, repo, plan.ignorePacks, nil, false, printer)`
			`if err != nil {`
			`return errors.Fatalf("%s", err)`
			`}`
			`}`

			`if len(plan.removePacks) != 0 {`
			`printer.P("removing %d old packs\n", len(plan.removePacks))`
			`_ = deleteFiles(ctx, true, repo, plan.removePacks, restic.PackFile, printer)`
			`}`

			`if opts.UnsafeRecovery {`
			`err = rebuildIndexFiles(ctx, repo, plan.ignorePacks, nil, true, printer)`
			`if err != nil {`
			`return errors.Fatalf("%s", err)`
			`}`
			`}`

			`printer.P("done\n")`
			`return nil`
			`}`

			`// deleteFiles deletes the given fileList of fileType in parallel`
			`// if ignoreError=true, it will print a warning if there was an error, else it will abort.`
			`func deleteFiles(ctx context.Context, ignoreError bool, repo restic.Repository, fileList restic.IDSet, fileType restic.FileType, printer progress.Printer) error {`
			`bar := printer.NewCounter("files deleted")`
			`defer bar.Done()`

			`return restic.ParallelRemove(ctx, repo, fileList, fileType, func(id restic.ID, err error) error {`
			`if err != nil {`
			`printer.E("unable to remove %v/%v from the repository\n", fileType, id)`
			`if !ignoreError {`
			`return err`
			`}`
			`}`
			`printer.VV("removed %v/%v\n", fileType, id)`
			`return nil`
			`}, bar)`
			`}`