1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2024-12-21 23:33:03 +00:00

Issue 5109

cmd_prune: Selection of packs to repack based on size
Add option --small-pack-size so prune can repack all packfiles smaller than small-pack-size bytes.
This commit is contained in:
Winfried Plappert 2024-12-16 14:57:30 +00:00
parent 6808004ad1
commit bdba108747
3 changed files with 44 additions and 14 deletions

View file

@ -0,0 +1,10 @@
Issue #5109 - Selection of packs to repack based on size
Add option --small-pack-size so prune can repack all packfiles smaller than small-pack-size bytes.
The new option is passed onto internal/repository/prune.go.
A new decision has been added to "decide what to do" to
move the intended packfiles onto the repackSmallCandidates queue.
The decision what to do with the candidate in repackSmallCandidates has been modofied,
so prune can be forced to process the intended repacking operation -by setting
--max-unused to a low value.

View file

@ -56,6 +56,9 @@ type PruneOptions struct {
MaxRepackSize string
MaxRepackBytes uint64
SmallPackSize string
SmallPackBytes uint64
RepackCacheableOnly bool
RepackSmall bool
RepackUncompressed bool
@ -74,10 +77,11 @@ func init() {
func addPruneOptions(c *cobra.Command, pruneOptions *PruneOptions) {
f := c.Flags()
f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')")
f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "stop after repacking this much data in total (allowed suffixes for `size`: k/K, m/M, g/G, t/T)")
f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)")
f.BoolVar(&pruneOptions.RepackCacheableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable")
f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "repack pack files below 80% of target pack size")
f.BoolVar(&pruneOptions.RepackUncompressed, "repack-uncompressed", false, "repack all uncompressed data")
f.StringVar(&pruneOptions.SmallPackSize, "small-pack-size", "0", "pack `below-limit` packfiles smaller than (allowed suffixes: k/K, m/M, g/G, t/T)")
}
func verifyPruneOptions(opts *PruneOptions) error {
@ -136,6 +140,13 @@ func verifyPruneOptions(opts *PruneOptions) error {
}
}
smallPackSize := strings.TrimSpace(opts.SmallPackSize)
size, err := ui.ParseBytes(smallPackSize)
if err != nil {
return errors.Fatalf("invalid number of bytes %q for --small-pack-size: %v", opts.SmallPackSize, err)
}
opts.SmallPackBytes = uint64(size)
return nil
}
@ -149,11 +160,7 @@ func runPrune(ctx context.Context, opts PruneOptions, gopts GlobalOptions, term
return errors.Fatal("disabled compression and `--repack-uncompressed` are mutually exclusive")
}
if gopts.NoLock && !opts.DryRun {
return errors.Fatal("--no-lock is only applicable in combination with --dry-run for prune command")
}
ctx, repo, unlock, err := openWithExclusiveLock(ctx, gopts, opts.DryRun && gopts.NoLock)
ctx, repo, unlock, err := openWithExclusiveLock(ctx, gopts, false)
if err != nil {
return err
}
@ -192,6 +199,9 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption
MaxUnusedBytes: opts.maxUnusedBytes,
MaxRepackBytes: opts.MaxRepackBytes,
SmallPackSize: opts.SmallPackSize,
SmallPackBytes: opts.SmallPackBytes,
RepackCacheableOnly: opts.RepackCacheableOnly,
RepackSmall: opts.RepackSmall,
RepackUncompressed: opts.RepackUncompressed,

View file

@ -25,6 +25,9 @@ type PruneOptions struct {
MaxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
MaxRepackBytes uint64
SmallPackSize string
SmallPackBytes uint64
RepackCacheableOnly bool
RepackSmall bool
RepackUncompressed bool
@ -323,6 +326,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
var repackCandidates []packInfoWithID
var repackSmallCandidates []packInfoWithID
var sizeRepackSmallCandidates int64
repoVersion := repo.Config().Version
// only repack very small files by default
targetPackSize := repo.packSize() / 25
@ -352,7 +356,6 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
id.Str(), p.unusedSize+p.usedSize, packSize)
return ErrSizeNotMatching
}
// statistics
switch {
case p.usedBlobs == 0:
@ -385,12 +388,18 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
// if this is a data pack and --repack-cacheable-only is set => keep pack!
stats.Packs.Keep++
//wpl 2024-11-08 - repack when packSize is smaller than opts.SmallPackBytes
case opts.RepackSmall && (uint64(packSize) <= opts.SmallPackBytes):
repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})
sizeRepackSmallCandidates += packSize
case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:
if packSize >= int64(targetPackSize) {
// All blobs in pack are used and not mixed => keep pack!
stats.Packs.Keep++
} else {
repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})
sizeRepackSmallCandidates += packSize
}
default:
@ -401,7 +410,8 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
delete(indexPack, id)
bar.Add(1)
return nil
})
}) // end repo.List(ctx, restic.PackFile ...)
bar.Done()
if err != nil {
return PrunePlan{}, err
@ -434,12 +444,15 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
}
}
if len(repackSmallCandidates) < 10 {
// calculate limit for number of unused bytes in the repo after repacking
maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)
// wpl we need a mixed calculation based the existing condition and on --max-unused
if len(repackSmallCandidates) >= 10 || opts.RepackSmall && (uint64(sizeRepackSmallCandidates) >= maxUnusedSizeAfter) {
repackCandidates = append(repackCandidates, repackSmallCandidates...)
} else {
// too few small files to be worth the trouble, this also prevents endlessly repacking
// if there is just a single pack file below the target size
stats.Packs.Keep += uint(len(repackSmallCandidates))
} else {
repackCandidates = append(repackCandidates, repackSmallCandidates...)
}
// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
@ -474,9 +487,6 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
}
}
// calculate limit for number of unused bytes in the repo after repacking
maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)
for _, p := range repackCandidates {
reachedUnusedSizeAfter := (stats.Size.Unused-stats.Size.Remove-stats.Size.Repackrm < maxUnusedSizeAfter)
reachedRepackSize := stats.Size.Repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes