mirror of
https://github.com/restic/restic.git
synced 2024-12-21 23:33:03 +00:00
Issue 5109
cmd_prune: Selection of packs to repack based on size Add option --small-pack-size so prune can repack all packfiles smaller than small-pack-size bytes.
This commit is contained in:
parent
6808004ad1
commit
bdba108747
3 changed files with 44 additions and 14 deletions
10
changelog/unreleased/issue-5109
Normal file
10
changelog/unreleased/issue-5109
Normal file
|
@ -0,0 +1,10 @@
|
|||
Issue #5109 - Selection of packs to repack based on size
|
||||
|
||||
Add option --small-pack-size so prune can repack all packfiles smaller than small-pack-size bytes.
|
||||
The new option is passed onto internal/repository/prune.go.
|
||||
A new decision has been added to "decide what to do" to
|
||||
move the intended packfiles onto the repackSmallCandidates queue.
|
||||
|
||||
The decision what to do with the candidate in repackSmallCandidates has been modofied,
|
||||
so prune can be forced to process the intended repacking operation -by setting
|
||||
--max-unused to a low value.
|
|
@ -56,6 +56,9 @@ type PruneOptions struct {
|
|||
MaxRepackSize string
|
||||
MaxRepackBytes uint64
|
||||
|
||||
SmallPackSize string
|
||||
SmallPackBytes uint64
|
||||
|
||||
RepackCacheableOnly bool
|
||||
RepackSmall bool
|
||||
RepackUncompressed bool
|
||||
|
@ -74,10 +77,11 @@ func init() {
|
|||
func addPruneOptions(c *cobra.Command, pruneOptions *PruneOptions) {
|
||||
f := c.Flags()
|
||||
f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')")
|
||||
f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "stop after repacking this much data in total (allowed suffixes for `size`: k/K, m/M, g/G, t/T)")
|
||||
f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)")
|
||||
f.BoolVar(&pruneOptions.RepackCacheableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable")
|
||||
f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "repack pack files below 80% of target pack size")
|
||||
f.BoolVar(&pruneOptions.RepackUncompressed, "repack-uncompressed", false, "repack all uncompressed data")
|
||||
f.StringVar(&pruneOptions.SmallPackSize, "small-pack-size", "0", "pack `below-limit` packfiles smaller than (allowed suffixes: k/K, m/M, g/G, t/T)")
|
||||
}
|
||||
|
||||
func verifyPruneOptions(opts *PruneOptions) error {
|
||||
|
@ -136,6 +140,13 @@ func verifyPruneOptions(opts *PruneOptions) error {
|
|||
}
|
||||
}
|
||||
|
||||
smallPackSize := strings.TrimSpace(opts.SmallPackSize)
|
||||
size, err := ui.ParseBytes(smallPackSize)
|
||||
if err != nil {
|
||||
return errors.Fatalf("invalid number of bytes %q for --small-pack-size: %v", opts.SmallPackSize, err)
|
||||
}
|
||||
opts.SmallPackBytes = uint64(size)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -149,11 +160,7 @@ func runPrune(ctx context.Context, opts PruneOptions, gopts GlobalOptions, term
|
|||
return errors.Fatal("disabled compression and `--repack-uncompressed` are mutually exclusive")
|
||||
}
|
||||
|
||||
if gopts.NoLock && !opts.DryRun {
|
||||
return errors.Fatal("--no-lock is only applicable in combination with --dry-run for prune command")
|
||||
}
|
||||
|
||||
ctx, repo, unlock, err := openWithExclusiveLock(ctx, gopts, opts.DryRun && gopts.NoLock)
|
||||
ctx, repo, unlock, err := openWithExclusiveLock(ctx, gopts, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -192,6 +199,9 @@ func runPruneWithRepo(ctx context.Context, opts PruneOptions, gopts GlobalOption
|
|||
MaxUnusedBytes: opts.maxUnusedBytes,
|
||||
MaxRepackBytes: opts.MaxRepackBytes,
|
||||
|
||||
SmallPackSize: opts.SmallPackSize,
|
||||
SmallPackBytes: opts.SmallPackBytes,
|
||||
|
||||
RepackCacheableOnly: opts.RepackCacheableOnly,
|
||||
RepackSmall: opts.RepackSmall,
|
||||
RepackUncompressed: opts.RepackUncompressed,
|
||||
|
|
|
@ -25,6 +25,9 @@ type PruneOptions struct {
|
|||
MaxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
||||
MaxRepackBytes uint64
|
||||
|
||||
SmallPackSize string
|
||||
SmallPackBytes uint64
|
||||
|
||||
RepackCacheableOnly bool
|
||||
RepackSmall bool
|
||||
RepackUncompressed bool
|
||||
|
@ -323,6 +326,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
|
||||
var repackCandidates []packInfoWithID
|
||||
var repackSmallCandidates []packInfoWithID
|
||||
var sizeRepackSmallCandidates int64
|
||||
repoVersion := repo.Config().Version
|
||||
// only repack very small files by default
|
||||
targetPackSize := repo.packSize() / 25
|
||||
|
@ -352,7 +356,6 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
id.Str(), p.unusedSize+p.usedSize, packSize)
|
||||
return ErrSizeNotMatching
|
||||
}
|
||||
|
||||
// statistics
|
||||
switch {
|
||||
case p.usedBlobs == 0:
|
||||
|
@ -385,12 +388,18 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
// if this is a data pack and --repack-cacheable-only is set => keep pack!
|
||||
stats.Packs.Keep++
|
||||
|
||||
//wpl 2024-11-08 - repack when packSize is smaller than opts.SmallPackBytes
|
||||
case opts.RepackSmall && (uint64(packSize) <= opts.SmallPackBytes):
|
||||
repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})
|
||||
sizeRepackSmallCandidates += packSize
|
||||
|
||||
case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:
|
||||
if packSize >= int64(targetPackSize) {
|
||||
// All blobs in pack are used and not mixed => keep pack!
|
||||
stats.Packs.Keep++
|
||||
} else {
|
||||
repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p, mustCompress: mustCompress})
|
||||
sizeRepackSmallCandidates += packSize
|
||||
}
|
||||
|
||||
default:
|
||||
|
@ -401,7 +410,8 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
delete(indexPack, id)
|
||||
bar.Add(1)
|
||||
return nil
|
||||
})
|
||||
}) // end repo.List(ctx, restic.PackFile ...)
|
||||
|
||||
bar.Done()
|
||||
if err != nil {
|
||||
return PrunePlan{}, err
|
||||
|
@ -434,12 +444,15 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
}
|
||||
}
|
||||
|
||||
if len(repackSmallCandidates) < 10 {
|
||||
// calculate limit for number of unused bytes in the repo after repacking
|
||||
maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)
|
||||
// wpl we need a mixed calculation based the existing condition and on --max-unused
|
||||
if len(repackSmallCandidates) >= 10 || opts.RepackSmall && (uint64(sizeRepackSmallCandidates) >= maxUnusedSizeAfter) {
|
||||
repackCandidates = append(repackCandidates, repackSmallCandidates...)
|
||||
} else {
|
||||
// too few small files to be worth the trouble, this also prevents endlessly repacking
|
||||
// if there is just a single pack file below the target size
|
||||
stats.Packs.Keep += uint(len(repackSmallCandidates))
|
||||
} else {
|
||||
repackCandidates = append(repackCandidates, repackSmallCandidates...)
|
||||
}
|
||||
|
||||
// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
|
||||
|
@ -474,9 +487,6 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
}
|
||||
}
|
||||
|
||||
// calculate limit for number of unused bytes in the repo after repacking
|
||||
maxUnusedSizeAfter := opts.MaxUnusedBytes(stats.Size.Used)
|
||||
|
||||
for _, p := range repackCandidates {
|
||||
reachedUnusedSizeAfter := (stats.Size.Unused-stats.Size.Remove-stats.Size.Repackrm < maxUnusedSizeAfter)
|
||||
reachedRepackSize := stats.Size.Repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes
|
||||
|
|
Loading…
Reference in a new issue