mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-22 06:01:54 +00:00
prune: cleanup checkpoints
kill all the checkpoints when pruning, except the latest one which might be useful for continuing an interrupted backup. kill the latest checkpoint also if it is already superceded by a successful backup. note: this only works on checkpoints matched by --prefix, other checkpoints are not touched.
This commit is contained in:
parent
39c0757020
commit
dabf816364
2 changed files with 39 additions and 7 deletions
|
@ -783,13 +783,21 @@ def do_prune(self, args, repository, manifest, key):
|
|||
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
|
||||
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
|
||||
return self.exit_code
|
||||
archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
|
||||
archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
|
||||
if args.prefix:
|
||||
archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
|
||||
archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
|
||||
is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
|
||||
checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
|
||||
# keep the latest checkpoint, if there is no later non-checkpoint archive
|
||||
latest_checkpoint = checkpoints[0] if checkpoints else None
|
||||
if archives_checkpoints[0] is latest_checkpoint:
|
||||
keep_checkpoints = [latest_checkpoint, ]
|
||||
else:
|
||||
keep_checkpoints = []
|
||||
checkpoints = set(checkpoints)
|
||||
# ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
|
||||
# that is newer than a successfully completed backup - and killing the successful backup.
|
||||
is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
|
||||
archives = [archive for archive in archives if not is_checkpoint(archive.name)]
|
||||
archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
|
||||
keep = []
|
||||
if args.within:
|
||||
keep += prune_within(archives, args.within)
|
||||
|
@ -807,11 +815,10 @@ def do_prune(self, args, repository, manifest, key):
|
|||
keep += prune_split(archives, '%Y-%m', args.monthly, keep)
|
||||
if args.yearly:
|
||||
keep += prune_split(archives, '%Y', args.yearly, keep)
|
||||
|
||||
to_delete = set(archives) - set(keep)
|
||||
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
|
||||
stats = Statistics()
|
||||
with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
|
||||
for archive in archives:
|
||||
for archive in archives_checkpoints:
|
||||
if archive in to_delete:
|
||||
if args.dry_run:
|
||||
if args.output_list:
|
||||
|
@ -1628,11 +1635,20 @@ def build_parser(self, args=None, prog=None):
|
|||
any of the specified retention options. This command is normally used by
|
||||
automated backup scripts wanting to keep a certain number of historic backups.
|
||||
|
||||
Also, prune automatically removes checkpoint archives (incomplete archives left
|
||||
behind by interrupted backup runs) except if the checkpoint is the latest
|
||||
archive (and thus still needed). Checkpoint archives are not considered when
|
||||
comparing archive counts against the retention limits (--keep-*).
|
||||
|
||||
If a prefix is set with -P, then only archives that start with the prefix are
|
||||
considered for deletion and only those archives count towards the totals
|
||||
specified by the rules.
|
||||
Otherwise, *all* archives in the repository are candidates for deletion!
|
||||
|
||||
If you have multiple sequences of archives with different data sets (e.g.
|
||||
from different machines) in one shared repository, use one prune call per
|
||||
data set that matches only the respective archives using the -P option.
|
||||
|
||||
The "--keep-within" option takes an argument of the form "<int><char>",
|
||||
where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
|
||||
to keep all archives that were created within the past 48 hours.
|
||||
|
|
|
@ -991,20 +991,36 @@ def test_prune_repository(self):
|
|||
# these are not really a checkpoints, but they look like some:
|
||||
self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
|
||||
self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
|
||||
self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
|
||||
output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
|
||||
self.assert_in('Would prune: test1', output)
|
||||
# must keep the latest non-checkpoint archive:
|
||||
self.assert_in('Keeping archive: test2', output)
|
||||
# must keep the latest checkpoint archive:
|
||||
self.assert_in('Keeping archive: test4.checkpoint', output)
|
||||
output = self.cmd('list', self.repository_location)
|
||||
self.assert_in('test1', output)
|
||||
self.assert_in('test2', output)
|
||||
self.assert_in('test3.checkpoint', output)
|
||||
self.assert_in('test3.checkpoint.1', output)
|
||||
self.assert_in('test4.checkpoint', output)
|
||||
self.cmd('prune', self.repository_location, '--keep-daily=2')
|
||||
output = self.cmd('list', self.repository_location)
|
||||
self.assert_not_in('test1', output)
|
||||
# the latest non-checkpoint archive must be still there:
|
||||
self.assert_in('test2', output)
|
||||
# only the latest checkpoint archive must still be there:
|
||||
self.assert_not_in('test3.checkpoint', output)
|
||||
self.assert_not_in('test3.checkpoint.1', output)
|
||||
self.assert_in('test4.checkpoint', output)
|
||||
# now we supercede the latest checkpoint by a successful backup:
|
||||
self.cmd('create', self.repository_location + '::test5', src_dir)
|
||||
self.cmd('prune', self.repository_location, '--keep-daily=2')
|
||||
output = self.cmd('list', self.repository_location)
|
||||
# all checkpoints should be gone now:
|
||||
self.assert_not_in('checkpoint', output)
|
||||
# the latest archive must be still there
|
||||
self.assert_in('test5', output)
|
||||
|
||||
def test_prune_repository_save_space(self):
|
||||
self.cmd('init', self.repository_location)
|
||||
|
|
Loading…
Reference in a new issue