1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-22 06:01:54 +00:00

prune: cleanup checkpoints

kill all the checkpoints when pruning, except the latest one which might be
useful for continuing an interrupted backup. kill the latest checkpoint also
if it is already superceded by a successful backup.

note: this only works on checkpoints matched by --prefix, other checkpoints
      are not touched.
This commit is contained in:
Thomas Waldmann 2016-05-03 23:06:26 +02:00
parent 39c0757020
commit dabf816364
2 changed files with 39 additions and 7 deletions

View file

@ -783,13 +783,21 @@ def do_prune(self, args, repository, manifest, key):
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
return self.exit_code
archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
if args.prefix:
archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
# keep the latest checkpoint, if there is no later non-checkpoint archive
latest_checkpoint = checkpoints[0] if checkpoints else None
if archives_checkpoints[0] is latest_checkpoint:
keep_checkpoints = [latest_checkpoint, ]
else:
keep_checkpoints = []
checkpoints = set(checkpoints)
# ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
# that is newer than a successfully completed backup - and killing the successful backup.
is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
archives = [archive for archive in archives if not is_checkpoint(archive.name)]
archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
keep = []
if args.within:
keep += prune_within(archives, args.within)
@ -807,11 +815,10 @@ def do_prune(self, args, repository, manifest, key):
keep += prune_split(archives, '%Y-%m', args.monthly, keep)
if args.yearly:
keep += prune_split(archives, '%Y', args.yearly, keep)
to_delete = set(archives) - set(keep)
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
stats = Statistics()
with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
for archive in archives:
for archive in archives_checkpoints:
if archive in to_delete:
if args.dry_run:
if args.output_list:
@ -1628,11 +1635,20 @@ def build_parser(self, args=None, prog=None):
any of the specified retention options. This command is normally used by
automated backup scripts wanting to keep a certain number of historic backups.
Also, prune automatically removes checkpoint archives (incomplete archives left
behind by interrupted backup runs) except if the checkpoint is the latest
archive (and thus still needed). Checkpoint archives are not considered when
comparing archive counts against the retention limits (--keep-*).
If a prefix is set with -P, then only archives that start with the prefix are
considered for deletion and only those archives count towards the totals
specified by the rules.
Otherwise, *all* archives in the repository are candidates for deletion!
If you have multiple sequences of archives with different data sets (e.g.
from different machines) in one shared repository, use one prune call per
data set that matches only the respective archives using the -P option.
The "--keep-within" option takes an argument of the form "<int><char>",
where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
to keep all archives that were created within the past 48 hours.

View file

@ -991,20 +991,36 @@ def test_prune_repository(self):
# these are not really a checkpoints, but they look like some:
self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
self.assert_in('Would prune: test1', output)
# must keep the latest non-checkpoint archive:
self.assert_in('Keeping archive: test2', output)
# must keep the latest checkpoint archive:
self.assert_in('Keeping archive: test4.checkpoint', output)
output = self.cmd('list', self.repository_location)
self.assert_in('test1', output)
self.assert_in('test2', output)
self.assert_in('test3.checkpoint', output)
self.assert_in('test3.checkpoint.1', output)
self.assert_in('test4.checkpoint', output)
self.cmd('prune', self.repository_location, '--keep-daily=2')
output = self.cmd('list', self.repository_location)
self.assert_not_in('test1', output)
# the latest non-checkpoint archive must be still there:
self.assert_in('test2', output)
# only the latest checkpoint archive must still be there:
self.assert_not_in('test3.checkpoint', output)
self.assert_not_in('test3.checkpoint.1', output)
self.assert_in('test4.checkpoint', output)
# now we supercede the latest checkpoint by a successful backup:
self.cmd('create', self.repository_location + '::test5', src_dir)
self.cmd('prune', self.repository_location, '--keep-daily=2')
output = self.cmd('list', self.repository_location)
# all checkpoints should be gone now:
self.assert_not_in('checkpoint', output)
# the latest archive must be still there
self.assert_in('test5', output)
def test_prune_repository_save_space(self):
self.cmd('init', self.repository_location)