diff --git a/setup.cfg b/setup.cfg index dd9230179..60975408d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -123,6 +123,7 @@ per_file_ignores = src/borg/archiver/debug.py:F405 src/borg/archiver/help.py:E501,F405 src/borg/archiver/keys.py:F405 + src/borg/archiver/prune.py:F405 src/borg/archiver/tar.py:F405 src/borg/cache.py:E127,E128,E402,E501,E722,W504 src/borg/fuse.py:E402,E501,E722,W504 diff --git a/src/borg/archiver/__init__.py b/src/borg/archiver/__init__.py index 45e3702da..5f170283c 100644 --- a/src/borg/archiver/__init__.py +++ b/src/borg/archiver/__init__.py @@ -12,7 +12,6 @@ try: import json import logging import os - import re import shlex import signal import stat @@ -43,7 +42,6 @@ try: from ..helpers import BaseFormatter, ItemFormatter, ArchiveFormatter from ..helpers import format_timedelta, format_file_size, parse_file_size, format_archive from ..helpers import remove_surrogates, bin_to_hex, eval_escapes - from ..helpers import interval, prune_within, prune_split, PRUNING_PATTERNS from ..helpers import timestamp from ..helpers import get_cache_dir, os_stat from ..helpers import Manifest @@ -105,11 +103,14 @@ from .debug import DebugMixIn from .help import HelpMixIn from .keys import KeysMixIn from .locks import LocksMixIn +from .prune import PruneMixIn from .tar import TarMixIn from .transfer import TransferMixIn -class Archiver(ConfigMixIn, DebugMixIn, TarMixIn, BenchmarkMixIn, KeysMixIn, LocksMixIn, HelpMixIn, TransferMixIn): +class Archiver( + ConfigMixIn, DebugMixIn, TarMixIn, BenchmarkMixIn, KeysMixIn, LocksMixIn, PruneMixIn, HelpMixIn, TransferMixIn +): def __init__(self, lock_wait=None, prog=None): self.exit_code = EXIT_SUCCESS self.lock_wait = lock_wait @@ -1164,93 +1165,6 @@ class Archiver(ConfigMixIn, DebugMixIn, TarMixIn, BenchmarkMixIn, KeysMixIn, Loc json_print(basic_json_data(manifest, cache=cache, extra={"archives": output_data})) return self.exit_code - @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,)) - def do_prune(self, args, repository, manifest, key): - """Prune repository archives according to specified rules""" - if not any( - (args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within) - ): - self.print_error( - 'At least one of the "keep-within", "keep-last", ' - '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' - '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.' - ) - return self.exit_code - checkpoint_re = r"\.checkpoint(\.\d+)?" - archives_checkpoints = manifest.archives.list( - glob=args.glob_archives, - consider_checkpoints=True, - match_end=r"(%s)?\Z" % checkpoint_re, - sort_by=["ts"], - reverse=True, - ) - is_checkpoint = re.compile(r"(%s)\Z" % checkpoint_re).search - checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] - # keep the latest checkpoint, if there is no later non-checkpoint archive - if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: - keep_checkpoints = checkpoints[:1] - else: - keep_checkpoints = [] - checkpoints = set(checkpoints) - # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup) - # that is newer than a successfully completed backup - and killing the successful backup. - archives = [arch for arch in archives_checkpoints if arch not in checkpoints] - keep = [] - # collect the rule responsible for the keeping of each archive in this dict - # keys are archive ids, values are a tuple - # (, ) - kept_because = {} - - # find archives which need to be kept because of the keep-within rule - if args.within: - keep += prune_within(archives, args.within, kept_because) - - # find archives which need to be kept because of the various time period rules - for rule in PRUNING_PATTERNS.keys(): - num = getattr(args, rule, None) - if num is not None: - keep += prune_split(archives, rule, num, kept_because) - - to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) - stats = Statistics(iec=args.iec) - with Cache(repository, key, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache: - list_logger = logging.getLogger("borg.output.list") - # set up counters for the progress display - to_delete_len = len(to_delete) - archives_deleted = 0 - pi = ProgressIndicatorPercent(total=len(to_delete), msg="Pruning archives %3.0f%%", msgid="prune") - for archive in archives_checkpoints: - if archive in to_delete: - pi.show() - if args.dry_run: - log_message = "Would prune:" - else: - archives_deleted += 1 - log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len) - archive = Archive( - repository, key, manifest, archive.name, cache, consider_part_files=args.consider_part_files - ) - archive.delete(stats, forced=args.forced) - else: - if is_checkpoint(archive.name): - log_message = "Keeping checkpoint archive:" - else: - log_message = "Keeping archive (rule: {rule} #{num}):".format( - rule=kept_because[archive.id][0], num=kept_because[archive.id][1] - ) - if args.output_list: - list_logger.info( - "{message:<40} {archive}".format(message=log_message, archive=format_archive(archive)) - ) - pi.finish() - if to_delete and not args.dry_run: - manifest.write() - repository.commit(compact=False, save_space=args.save_space) - cache.commit() - if args.stats: - log_multi(str(stats), logger=logging.getLogger("borg.output.stats")) - return self.exit_code - @with_repository(cache=True, exclusive=True, compatibility=(Manifest.Operation.CHECK,)) def do_recreate(self, args, repository, manifest, key, cache): """Re-create archives""" @@ -1714,6 +1628,7 @@ class Archiver(ConfigMixIn, DebugMixIn, TarMixIn, BenchmarkMixIn, KeysMixIn, Loc self.build_parser_benchmarks(subparsers, common_parser, mid_common_parser) self.build_parser_locks(subparsers, common_parser, mid_common_parser) + self.build_parser_prune(subparsers, common_parser, mid_common_parser) # borg check check_epilog = process_epilog( @@ -2888,125 +2803,6 @@ class Archiver(ConfigMixIn, DebugMixIn, TarMixIn, BenchmarkMixIn, KeysMixIn, Loc ) define_borg_mount(subparser) - # borg prune - prune_epilog = process_epilog( - """ - The prune command prunes a repository by deleting all archives not matching - any of the specified retention options. - - Important: Repository disk space is **not** freed until you run ``borg compact``. - - This command is normally used by automated backup scripts wanting to keep a - certain number of historic backups. This retention policy is commonly referred to as - `GFS `_ - (Grandfather-father-son) backup rotation scheme. - - Also, prune automatically removes checkpoint archives (incomplete archives left - behind by interrupted backup runs) except if the checkpoint is the latest - archive (and thus still needed). Checkpoint archives are not considered when - comparing archive counts against the retention limits (``--keep-X``). - - If a prefix is set with -P, then only archives that start with the prefix are - considered for deletion and only those archives count towards the totals - specified by the rules. - Otherwise, *all* archives in the repository are candidates for deletion! - There is no automatic distinction between archives representing different - contents. These need to be distinguished by specifying matching prefixes. - - If you have multiple sequences of archives with different data sets (e.g. - from different machines) in one shared repository, use one prune call per - data set that matches only the respective archives using the -P option. - - The ``--keep-within`` option takes an argument of the form "", - where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means - to keep all archives that were created within the past 48 hours. - "1m" is taken to mean "31d". The archives kept with this option do not - count towards the totals specified by any other options. - - A good procedure is to thin out more and more the older your backups get. - As an example, ``--keep-daily 7`` means to keep the latest backup on each day, - up to 7 most recent days with backups (days without backups do not count). - The rules are applied from secondly to yearly, and backups selected by previous - rules do not count towards those of later rules. The time that each backup - starts is used for pruning purposes. Dates and times are interpreted in - the local timezone, and weeks go from Monday to Sunday. Specifying a - negative number of archives to keep means that there is no limit. As of borg - 1.2.0, borg will retain the oldest archive if any of the secondly, minutely, - hourly, daily, weekly, monthly, or yearly rules was not otherwise able to meet - its retention target. This enables the first chronological archive to continue - aging until it is replaced by a newer archive that meets the retention criteria. - - The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will - keep the last N archives under the assumption that you do not create more than one - backup archive in the same second). - - When using ``--stats``, you will get some statistics about how much data was - deleted - the "Deleted data" deduplicated size there is most interesting as - that is how much your repository will shrink. - Please note that the "All archives" stats refer to the state after pruning. - """ - ) - subparser = subparsers.add_parser( - "prune", - parents=[common_parser], - add_help=False, - description=self.do_prune.__doc__, - epilog=prune_epilog, - formatter_class=argparse.RawDescriptionHelpFormatter, - help="prune archives", - ) - subparser.set_defaults(func=self.do_prune) - subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository") - subparser.add_argument( - "--force", - dest="forced", - action="store_true", - help="force pruning of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.", - ) - subparser.add_argument( - "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive" - ) - subparser.add_argument( - "--list", dest="output_list", action="store_true", help="output verbose list of archives it keeps/prunes" - ) - subparser.add_argument( - "--keep-within", - metavar="INTERVAL", - dest="within", - type=interval, - help="keep all archives within this time interval", - ) - subparser.add_argument( - "--keep-last", - "--keep-secondly", - dest="secondly", - type=int, - default=0, - help="number of secondly archives to keep", - ) - subparser.add_argument( - "--keep-minutely", dest="minutely", type=int, default=0, help="number of minutely archives to keep" - ) - subparser.add_argument( - "-H", "--keep-hourly", dest="hourly", type=int, default=0, help="number of hourly archives to keep" - ) - subparser.add_argument( - "-d", "--keep-daily", dest="daily", type=int, default=0, help="number of daily archives to keep" - ) - subparser.add_argument( - "-w", "--keep-weekly", dest="weekly", type=int, default=0, help="number of weekly archives to keep" - ) - subparser.add_argument( - "-m", "--keep-monthly", dest="monthly", type=int, default=0, help="number of monthly archives to keep" - ) - subparser.add_argument( - "-y", "--keep-yearly", dest="yearly", type=int, default=0, help="number of yearly archives to keep" - ) - define_archive_filters_group(subparser, sort_by=False, first_last=False) - subparser.add_argument( - "--save-space", dest="save_space", action="store_true", help="work slower, but using less space" - ) - # borg recreate recreate_epilog = process_epilog( """ diff --git a/src/borg/archiver/prune.py b/src/borg/archiver/prune.py new file mode 100644 index 000000000..1015f4d96 --- /dev/null +++ b/src/borg/archiver/prune.py @@ -0,0 +1,229 @@ +import argparse +import logging +import re + +from .common import with_repository +from ..archive import Archive, Statistics +from ..cache import Cache +from ..constants import * # NOQA +from ..helpers import format_archive +from ..helpers import interval, prune_within, prune_split, PRUNING_PATTERNS +from ..helpers import Manifest +from ..helpers import log_multi +from ..helpers import ProgressIndicatorPercent + +from ..logger import create_logger + +logger = create_logger() + + +class PruneMixIn: + @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,)) + def do_prune(self, args, repository, manifest, key): + """Prune repository archives according to specified rules""" + if not any( + (args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within) + ): + self.print_error( + 'At least one of the "keep-within", "keep-last", ' + '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' + '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.' + ) + return self.exit_code + checkpoint_re = r"\.checkpoint(\.\d+)?" + archives_checkpoints = manifest.archives.list( + glob=args.glob_archives, + consider_checkpoints=True, + match_end=r"(%s)?\Z" % checkpoint_re, + sort_by=["ts"], + reverse=True, + ) + is_checkpoint = re.compile(r"(%s)\Z" % checkpoint_re).search + checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] + # keep the latest checkpoint, if there is no later non-checkpoint archive + if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: + keep_checkpoints = checkpoints[:1] + else: + keep_checkpoints = [] + checkpoints = set(checkpoints) + # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup) + # that is newer than a successfully completed backup - and killing the successful backup. + archives = [arch for arch in archives_checkpoints if arch not in checkpoints] + keep = [] + # collect the rule responsible for the keeping of each archive in this dict + # keys are archive ids, values are a tuple + # (, ) + kept_because = {} + + # find archives which need to be kept because of the keep-within rule + if args.within: + keep += prune_within(archives, args.within, kept_because) + + # find archives which need to be kept because of the various time period rules + for rule in PRUNING_PATTERNS.keys(): + num = getattr(args, rule, None) + if num is not None: + keep += prune_split(archives, rule, num, kept_because) + + to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) + stats = Statistics(iec=args.iec) + with Cache(repository, key, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache: + list_logger = logging.getLogger("borg.output.list") + # set up counters for the progress display + to_delete_len = len(to_delete) + archives_deleted = 0 + pi = ProgressIndicatorPercent(total=len(to_delete), msg="Pruning archives %3.0f%%", msgid="prune") + for archive in archives_checkpoints: + if archive in to_delete: + pi.show() + if args.dry_run: + log_message = "Would prune:" + else: + archives_deleted += 1 + log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len) + archive = Archive( + repository, key, manifest, archive.name, cache, consider_part_files=args.consider_part_files + ) + archive.delete(stats, forced=args.forced) + else: + if is_checkpoint(archive.name): + log_message = "Keeping checkpoint archive:" + else: + log_message = "Keeping archive (rule: {rule} #{num}):".format( + rule=kept_because[archive.id][0], num=kept_because[archive.id][1] + ) + if args.output_list: + list_logger.info( + "{message:<40} {archive}".format(message=log_message, archive=format_archive(archive)) + ) + pi.finish() + if to_delete and not args.dry_run: + manifest.write() + repository.commit(compact=False, save_space=args.save_space) + cache.commit() + if args.stats: + log_multi(str(stats), logger=logging.getLogger("borg.output.stats")) + return self.exit_code + + def build_parser_prune(self, subparsers, common_parser, mid_common_parser): + + from .common import process_epilog + from .common import define_archive_filters_group + + prune_epilog = process_epilog( + """ + The prune command prunes a repository by deleting all archives not matching + any of the specified retention options. + + Important: Repository disk space is **not** freed until you run ``borg compact``. + + This command is normally used by automated backup scripts wanting to keep a + certain number of historic backups. This retention policy is commonly referred to as + `GFS `_ + (Grandfather-father-son) backup rotation scheme. + + Also, prune automatically removes checkpoint archives (incomplete archives left + behind by interrupted backup runs) except if the checkpoint is the latest + archive (and thus still needed). Checkpoint archives are not considered when + comparing archive counts against the retention limits (``--keep-X``). + + If a prefix is set with -P, then only archives that start with the prefix are + considered for deletion and only those archives count towards the totals + specified by the rules. + Otherwise, *all* archives in the repository are candidates for deletion! + There is no automatic distinction between archives representing different + contents. These need to be distinguished by specifying matching prefixes. + + If you have multiple sequences of archives with different data sets (e.g. + from different machines) in one shared repository, use one prune call per + data set that matches only the respective archives using the -P option. + + The ``--keep-within`` option takes an argument of the form "", + where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means + to keep all archives that were created within the past 48 hours. + "1m" is taken to mean "31d". The archives kept with this option do not + count towards the totals specified by any other options. + + A good procedure is to thin out more and more the older your backups get. + As an example, ``--keep-daily 7`` means to keep the latest backup on each day, + up to 7 most recent days with backups (days without backups do not count). + The rules are applied from secondly to yearly, and backups selected by previous + rules do not count towards those of later rules. The time that each backup + starts is used for pruning purposes. Dates and times are interpreted in + the local timezone, and weeks go from Monday to Sunday. Specifying a + negative number of archives to keep means that there is no limit. As of borg + 1.2.0, borg will retain the oldest archive if any of the secondly, minutely, + hourly, daily, weekly, monthly, or yearly rules was not otherwise able to meet + its retention target. This enables the first chronological archive to continue + aging until it is replaced by a newer archive that meets the retention criteria. + + The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will + keep the last N archives under the assumption that you do not create more than one + backup archive in the same second). + + When using ``--stats``, you will get some statistics about how much data was + deleted - the "Deleted data" deduplicated size there is most interesting as + that is how much your repository will shrink. + Please note that the "All archives" stats refer to the state after pruning. + """ + ) + subparser = subparsers.add_parser( + "prune", + parents=[common_parser], + add_help=False, + description=self.do_prune.__doc__, + epilog=prune_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help="prune archives", + ) + subparser.set_defaults(func=self.do_prune) + subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository") + subparser.add_argument( + "--force", + dest="forced", + action="store_true", + help="force pruning of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.", + ) + subparser.add_argument( + "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive" + ) + subparser.add_argument( + "--list", dest="output_list", action="store_true", help="output verbose list of archives it keeps/prunes" + ) + subparser.add_argument( + "--keep-within", + metavar="INTERVAL", + dest="within", + type=interval, + help="keep all archives within this time interval", + ) + subparser.add_argument( + "--keep-last", + "--keep-secondly", + dest="secondly", + type=int, + default=0, + help="number of secondly archives to keep", + ) + subparser.add_argument( + "--keep-minutely", dest="minutely", type=int, default=0, help="number of minutely archives to keep" + ) + subparser.add_argument( + "-H", "--keep-hourly", dest="hourly", type=int, default=0, help="number of hourly archives to keep" + ) + subparser.add_argument( + "-d", "--keep-daily", dest="daily", type=int, default=0, help="number of daily archives to keep" + ) + subparser.add_argument( + "-w", "--keep-weekly", dest="weekly", type=int, default=0, help="number of weekly archives to keep" + ) + subparser.add_argument( + "-m", "--keep-monthly", dest="monthly", type=int, default=0, help="number of monthly archives to keep" + ) + subparser.add_argument( + "-y", "--keep-yearly", dest="yearly", type=int, default=0, help="number of yearly archives to keep" + ) + define_archive_filters_group(subparser, sort_by=False, first_last=False) + subparser.add_argument( + "--save-space", dest="save_space", action="store_true", help="work slower, but using less space" + )