1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-07 03:59:19 +00:00

Merge pull request from ThomasWaldmann/prune-quarterly-master

Add quarterly pruning strategy.
This commit is contained in:
TW 2024-10-06 12:12:13 +02:00 committed by GitHub
commit ee386d0bef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 189 additions and 16 deletions
docs/misc
src/borg
archiver
testsuite/archiver

View file

@ -100,3 +100,27 @@ example simple. They all work in basically the same way.
The weekly rule is easy to understand roughly, but hard to understand in all
details. If interested, read "ISO 8601:2000 standard week-based year".
The 13weekly and 3monthly rules are two different strategies for keeping one
every quarter of a year. There are `multiple ways` to define a quarter-year;
borg prune recognizes two:
* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's
definition of the week-based year. January 4th is always included in the
first week of a year, and January 1st to 3rd may be in week 52 or 53 of the
previous year. Week 53 is also in the fourth quarter of the year.
* --keep-3monthly keeps one backup every 3 months. January 1st to
March 31, April 1st to June 30th, July 1st to September 30th, and October 1st
to December 31st form the quarters.
If the subtleties of the definition of a quarter year don't matter to you, a
short summary of behavior is:
* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, July,
and Oct.
* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sept.
* Both strategies will have some overlap in which backups are kept.
* The differences are negligible unless backups considered for deletion were
created weekly or more frequently.
.. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year

View file

@ -30,15 +30,62 @@ def prune_within(archives, hours, kept_because):
return result
def default_period_func(pattern):
def inner(a):
# compute in local timezone
return a.ts.astimezone().strftime(pattern)
return inner
def quarterly_13weekly_period_func(a):
(year, week, _) = a.ts.astimezone().isocalendar() # local time
if week <= 13:
# Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
# days later.
return (year, 1)
elif 14 <= week <= 26:
# Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91
# days later.
return (year, 2)
elif 27 <= week <= 39:
# Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th-
# at least 91 days later.
return (year, 3)
else:
# Everything else, Oct 3rd (leap year) or 4th onward, will always
# include week of Dec 26th (leap year) or Dec 27th, may also include
# up to possibly Jan 3rd of next year.
return (year, 4)
def quarterly_3monthly_period_func(a):
lt = a.ts.astimezone() # local time
if lt.month <= 3:
# 1-1 to 3-31
return (lt.year, 1)
elif 4 <= lt.month <= 6:
# 4-1 to 6-30
return (lt.year, 2)
elif 7 <= lt.month <= 9:
# 7-1 to 9-30
return (lt.year, 3)
else:
# 10-1 to 12-31
return (lt.year, 4)
PRUNING_PATTERNS = OrderedDict(
[
("secondly", "%Y-%m-%d %H:%M:%S"),
("minutely", "%Y-%m-%d %H:%M"),
("hourly", "%Y-%m-%d %H"),
("daily", "%Y-%m-%d"),
("weekly", "%G-%V"),
("monthly", "%Y-%m"),
("yearly", "%Y"),
("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
("minutely", default_period_func("%Y-%m-%d %H:%M")),
("hourly", default_period_func("%Y-%m-%d %H")),
("daily", default_period_func("%Y-%m-%d")),
("weekly", default_period_func("%G-%V")),
("monthly", default_period_func("%Y-%m")),
("quarterly_13weekly", quarterly_13weekly_period_func),
("quarterly_3monthly", quarterly_3monthly_period_func),
("yearly", default_period_func("%Y")),
]
)
@ -46,7 +93,7 @@ PRUNING_PATTERNS = OrderedDict(
def prune_split(archives, rule, n, kept_because=None):
last = None
keep = []
pattern = PRUNING_PATTERNS[rule]
period_func = PRUNING_PATTERNS[rule]
if kept_because is None:
kept_because = {}
if n == 0:
@ -54,8 +101,7 @@ def prune_split(archives, rule, n, kept_because=None):
a = None
for a in sorted(archives, key=attrgetter("ts"), reverse=True):
# we compute the pruning in local time zone
period = a.ts.astimezone().strftime(pattern)
period = period_func(a)
if period != last:
last = period
if a.id not in kept_because:
@ -75,12 +121,24 @@ class PruneMixIn:
def do_prune(self, args, repository, manifest):
"""Prune repository archives according to specified rules"""
if not any(
(args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within)
(
args.secondly,
args.minutely,
args.hourly,
args.daily,
args.weekly,
args.monthly,
args.quarterly_13weekly,
args.quarterly_3monthly,
args.yearly,
args.within,
)
):
raise CommandError(
'At least one of the "keep-within", "keep-last", '
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.'
'"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
'or "keep-yearly" settings must be specified.'
)
if args.format is not None:
@ -190,10 +248,15 @@ class PruneMixIn:
starts is used for pruning purposes. Dates and times are interpreted in the local
timezone of the system where borg prune runs, and weeks go from Monday to Sunday.
Specifying a negative number of archives to keep means that there is no limit.
As of borg 1.2.0, borg will retain the oldest archive if any of the secondly,
minutely, hourly, daily, weekly, monthly, or yearly rules was not otherwise able to
meet its retention target. This enables the first chronological archive to continue
aging until it is replaced by a newer archive that meets the retention criteria.
Borg will retain the oldest archive if any of the secondly, minutely, hourly,
daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to
meet its retention target. This enables the first chronological archive to
continue aging until it is replaced by a newer archive that meets the retention
criteria.
The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different
strategies for keeping archives every quarter year.
The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
keep the last N archives under the assumption that you do not create more than one
@ -293,6 +356,21 @@ class PruneMixIn:
action=Highlander,
help="number of monthly archives to keep",
)
quarterly_group = subparser.add_mutually_exclusive_group()
quarterly_group.add_argument(
"--keep-13weekly",
dest="quarterly_13weekly",
type=int,
default=0,
help="number of quarterly archives to keep (13 week strategy)",
)
quarterly_group.add_argument(
"--keep-3monthly",
dest="quarterly_3monthly",
type=int,
default=0,
help="number of quarterly archives to keep (3 month strategy)",
)
subparser.add_argument(
"-y",
"--keep-yearly",

View file

@ -100,6 +100,77 @@ def test_prune_repository_example(archivers, request):
assert "test%02d" % i not in output
def test_prune_quarterly(archivers, request):
# Example worked through by hand when developing quarterly
# strategy, based upon existing backups where quarterly strategy
# is desired. Weekly/monthly backups that don't affect results were
# trimmed to speed up the test.
#
# Week number is shown in comment for every row in the below list.
# Year is also shown when it doesn't match the year given in the
# date tuple.
archiver = request.getfixturevalue(archivers)
test_dates = [
(2020, 12, 6),
(2021, 1, 3), # 49, 2020-53
(2021, 3, 28),
(2021, 4, 25), # 12, 16
(2021, 6, 27),
(2021, 7, 4), # 25, 26
(2021, 9, 26),
(2021, 10, 3), # 38, 39
(2021, 12, 26),
(2022, 1, 2), # 51, 2021-52
]
def mk_name(tup):
(y, m, d) = tup
suff = datetime(y, m, d).strftime("%Y-%m-%d")
return f"test-{suff}"
# The kept repos are based on working on an example by hand,
# archives made on the following dates should be kept:
EXPECTED_KEPT = {
"13weekly": [(2020, 12, 6), (2021, 1, 3), (2021, 3, 28), (2021, 7, 4), (2021, 10, 3), (2022, 1, 2)],
"3monthly": [(2020, 12, 6), (2021, 3, 28), (2021, 6, 27), (2021, 9, 26), (2021, 12, 26), (2022, 1, 2)],
}
for strat, to_keep in EXPECTED_KEPT.items():
# Initialize our repo.
cmd(archiver, "repo-create", RK_ENCRYPTION)
for a, (y, m, d) in zip(map(mk_name, test_dates), test_dates):
_create_archive_ts(archiver, a, y, m, d)
to_prune = list(set(test_dates) - set(to_keep))
# Use 99 instead of -1 to test that oldest backup is kept.
output = cmd(archiver, "prune", "--list", "--dry-run", f"--keep-{strat}=99")
for a in map(mk_name, to_prune):
assert re.search(rf"Would prune:\s+{a}", output)
oldest = r"\[oldest\]" if strat in ("13weekly") else ""
assert re.search(rf"Keeping archive \(rule: quarterly_{strat}{oldest} #\d+\):\s+test-2020-12-06", output)
for a in map(mk_name, to_keep[1:]):
assert re.search(rf"Keeping archive \(rule: quarterly_{strat} #\d+\):\s+{a}", output)
output = cmd(archiver, "repo-list")
# Nothing pruned after dry run
for a in map(mk_name, test_dates):
assert a in output
cmd(archiver, "prune", f"--keep-{strat}=99")
output = cmd(archiver, "repo-list")
# All matching backups plus oldest kept
for a in map(mk_name, to_keep):
assert a in output
# Other backups have been pruned
for a in map(mk_name, to_prune):
assert a not in output
# Delete repo and begin anew
cmd(archiver, "repo-delete")
# With an initial and daily backup, prune daily until oldest is replaced by a monthly backup
def test_prune_retain_and_expire_oldest(archivers, request):
archiver = request.getfixturevalue(archivers)