import argparse from collections import OrderedDict from datetime import datetime, timezone, timedelta import logging from operator import attrgetter import os import re from ._common import with_repository, Highlander from ..archive import Archive, Statistics from ..cache import Cache from ..constants import * # NOQA from ..helpers import ArchiveFormatter, interval, sig_int, log_multi, ProgressIndicatorPercent, CommandError, Error from ..manifest import Manifest from ..logger import create_logger logger = create_logger() def prune_within(archives, hours, kept_because): target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600) kept_counter = 0 result = [] for a in archives: if a.ts > target: kept_counter += 1 kept_because[a.id] = ("within", kept_counter) result.append(a) return result PRUNING_PATTERNS = OrderedDict( [ ("secondly", "%Y-%m-%d %H:%M:%S"), ("minutely", "%Y-%m-%d %H:%M"), ("hourly", "%Y-%m-%d %H"), ("daily", "%Y-%m-%d"), ("weekly", "%G-%V"), ("monthly", "%Y-%m"), ("yearly", "%Y"), ] ) def prune_split(archives, rule, n, kept_because=None): last = None keep = [] pattern = PRUNING_PATTERNS[rule] if kept_because is None: kept_because = {} if n == 0: return keep a = None for a in sorted(archives, key=attrgetter("ts"), reverse=True): # we compute the pruning in local time zone period = a.ts.astimezone().strftime(pattern) if period != last: last = period if a.id not in kept_because: keep.append(a) kept_because[a.id] = (rule, len(keep)) if len(keep) == n: break # Keep oldest archive if we didn't reach the target retention count if a is not None and len(keep) < n and a.id not in kept_because: keep.append(a) kept_because[a.id] = (rule + "[oldest]", len(keep)) return keep class PruneMixIn: @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,)) def do_prune(self, args, repository, manifest): """Prune repository archives according to specified rules""" if not any( (args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within) ): raise CommandError( 'At least one of the "keep-within", "keep-last", ' '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.' ) if args.format is not None: format = args.format elif args.short: format = "{archive}" else: format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]") formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec) checkpoint_re = r"\.checkpoint(\.\d+)?" archives_checkpoints = manifest.archives.list( match=args.match_archives, consider_checkpoints=True, match_end=r"(%s)?\Z" % checkpoint_re, sort_by=["ts"], reverse=True, ) is_checkpoint = re.compile(r"(%s)\Z" % checkpoint_re).search checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] # keep the latest checkpoint, if there is no later non-checkpoint archive if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: keep_checkpoints = checkpoints[:1] else: keep_checkpoints = [] checkpoints = set(checkpoints) # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup) # that is newer than a successfully completed backup - and killing the successful backup. archives = [arch for arch in archives_checkpoints if arch not in checkpoints] keep = [] # collect the rule responsible for the keeping of each archive in this dict # keys are archive ids, values are a tuple # (, ) kept_because = {} # find archives which need to be kept because of the keep-within rule if args.within: keep += prune_within(archives, args.within, kept_because) # find archives which need to be kept because of the various time period rules for rule in PRUNING_PATTERNS.keys(): num = getattr(args, rule, None) if num is not None: keep += prune_split(archives, rule, num, kept_because) to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) stats = Statistics(iec=args.iec) with Cache(repository, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache: def checkpoint_func(): manifest.write() repository.commit(compact=False) cache.commit() list_logger = logging.getLogger("borg.output.list") # set up counters for the progress display to_delete_len = len(to_delete) archives_deleted = 0 uncommitted_deletes = 0 pi = ProgressIndicatorPercent(total=len(to_delete), msg="Pruning archives %3.0f%%", msgid="prune") for archive in archives_checkpoints: if sig_int and sig_int.action_done(): break if archive in to_delete: pi.show() if args.dry_run: log_message = "Would prune:" else: archives_deleted += 1 log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len) archive = Archive(manifest, archive.name, cache) archive.delete(stats, forced=args.forced) checkpointed = self.maybe_checkpoint( checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval ) uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1) else: if is_checkpoint(archive.name): log_message = "Keeping checkpoint archive:" else: log_message = "Keeping archive (rule: {rule} #{num}):".format( rule=kept_because[archive.id][0], num=kept_because[archive.id][1] ) if ( args.output_list or (args.list_pruned and archive in to_delete) or (args.list_kept and archive not in to_delete) ): list_logger.info(f"{log_message:<44} {formatter.format_item(archive, jsonline=False)}") pi.finish() if sig_int: # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case. raise Error("Got Ctrl-C / SIGINT.") elif uncommitted_deletes > 0: checkpoint_func() if args.stats: log_multi(str(stats), logger=logging.getLogger("borg.output.stats")) def build_parser_prune(self, subparsers, common_parser, mid_common_parser): from ._common import process_epilog from ._common import define_archive_filters_group prune_epilog = process_epilog( """ The prune command prunes a repository by deleting all archives not matching any of the specified retention options. Important: Repository disk space is **not** freed until you run ``borg compact``. This command is normally used by automated backup scripts wanting to keep a certain number of historic backups. This retention policy is commonly referred to as `GFS `_ (Grandfather-father-son) backup rotation scheme. Also, prune automatically removes checkpoint archives (incomplete archives left behind by interrupted backup runs) except if the checkpoint is the latest archive (and thus still needed). Checkpoint archives are not considered when comparing archive counts against the retention limits (``--keep-X``). If you use --match-archives (-a), then only archives that match the pattern are considered for deletion and only those archives count towards the totals specified by the rules. Otherwise, *all* archives in the repository are candidates for deletion! There is no automatic distinction between archives representing different contents. These need to be distinguished by specifying matching globs. If you have multiple sequences of archives with different data sets (e.g. from different machines) in one shared repository, use one prune call per data set that matches only the respective archives using the --match-archives (-a) option. The ``--keep-within`` option takes an argument of the form "", where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means to keep all archives that were created within the past 48 hours. "1m" is taken to mean "31d". The archives kept with this option do not count towards the totals specified by any other options. A good procedure is to thin out more and more the older your backups get. As an example, ``--keep-daily 7`` means to keep the latest backup on each day, up to 7 most recent days with backups (days without backups do not count). The rules are applied from secondly to yearly, and backups selected by previous rules do not count towards those of later rules. The time that each backup starts is used for pruning purposes. Dates and times are interpreted in the local timezone of the system where borg prune runs, and weeks go from Monday to Sunday. Specifying a negative number of archives to keep means that there is no limit. As of borg 1.2.0, borg will retain the oldest archive if any of the secondly, minutely, hourly, daily, weekly, monthly, or yearly rules was not otherwise able to meet its retention target. This enables the first chronological archive to continue aging until it is replaced by a newer archive that meets the retention criteria. The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will keep the last N archives under the assumption that you do not create more than one backup archive in the same second). When using ``--stats``, you will get some statistics about how much data was deleted - the "Deleted data" deduplicated size there is most interesting as that is how much your repository will shrink. Please note that the "All archives" stats refer to the state after pruning. You can influence how the ``--list`` output is formatted by using the ``--short`` option (less wide output) or by giving a custom format using ``--format`` (see the ``borg rlist`` description for more details about the format string). """ ) subparser = subparsers.add_parser( "prune", parents=[common_parser], add_help=False, description=self.do_prune.__doc__, epilog=prune_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, help="prune archives", ) subparser.set_defaults(func=self.do_prune) subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository") subparser.add_argument( "--force", dest="forced", action="store_true", help="force pruning of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.", ) subparser.add_argument( "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive" ) subparser.add_argument( "--list", dest="output_list", action="store_true", help="output verbose list of archives it keeps/prunes" ) subparser.add_argument("--short", dest="short", action="store_true", help="use a less wide archive part format") subparser.add_argument( "--list-pruned", dest="list_pruned", action="store_true", help="output verbose list of archives it prunes" ) subparser.add_argument( "--list-kept", dest="list_kept", action="store_true", help="output verbose list of archives it keeps" ) subparser.add_argument( "--format", metavar="FORMAT", dest="format", action=Highlander, help="specify format for the archive part " '(default: "{archive:<36} {time} [{id}]")', ) subparser.add_argument( "--keep-within", metavar="INTERVAL", dest="within", type=interval, action=Highlander, help="keep all archives within this time interval", ) subparser.add_argument( "--keep-last", "--keep-secondly", dest="secondly", type=int, default=0, action=Highlander, help="number of secondly archives to keep", ) subparser.add_argument( "--keep-minutely", dest="minutely", type=int, default=0, action=Highlander, help="number of minutely archives to keep", ) subparser.add_argument( "-H", "--keep-hourly", dest="hourly", type=int, default=0, action=Highlander, help="number of hourly archives to keep", ) subparser.add_argument( "-d", "--keep-daily", dest="daily", type=int, default=0, action=Highlander, help="number of daily archives to keep", ) subparser.add_argument( "-w", "--keep-weekly", dest="weekly", type=int, default=0, action=Highlander, help="number of weekly archives to keep", ) subparser.add_argument( "-m", "--keep-monthly", dest="monthly", type=int, default=0, action=Highlander, help="number of monthly archives to keep", ) subparser.add_argument( "-y", "--keep-yearly", dest="yearly", type=int, default=0, action=Highlander, help="number of yearly archives to keep", ) define_archive_filters_group(subparser, sort_by=False, first_last=False) subparser.add_argument( "-c", "--checkpoint-interval", metavar="SECONDS", dest="checkpoint_interval", type=int, default=1800, action=Highlander, help="write checkpoint every SECONDS seconds (Default: 1800)", )