diff --git a/src/borg/archive.py b/src/borg/archive.py index 24411f15b..c07cec08c 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -458,6 +458,7 @@ def __init__( end=None, log_json=False, iec=False, + deleted=False, ): name_is_id = isinstance(name, bytes) self.cwd = os.getcwd() @@ -499,8 +500,9 @@ def __init__( self.tags = set() else: if name_is_id: - # we also go over the manifest here to avoid quick&dirty deleted archives - info = self.manifest.archives.get_by_id(name) + # we also go over the manifest here to avoid soft-deleted archives, + # except if we explicitly request one via deleted=True. + info = self.manifest.archives.get_by_id(name, deleted=deleted) else: info = self.manifest.archives.get(name) if info is None: @@ -1633,7 +1635,7 @@ def check( *, verify_data=False, repair=False, - undelete_archives=False, + find_lost_archives=False, match=None, sort_by="", first=0, @@ -1646,7 +1648,7 @@ def check( """Perform a set of checks on 'repository' :param repair: enable repair mode, write updated or corrected data into repository - :param undelete_archives: create archive directory entries that are missing + :param find_lost_archives: create archive directory entries that are missing :param first/last/sort_by: only check this number of first/last archives ordered by sort_by :param match: only check archives matching this pattern :param older/newer: only check archives older/newer than timedelta from now @@ -1683,7 +1685,7 @@ def check( rebuild_manifest = True if rebuild_manifest: self.manifest = self.rebuild_manifest() - if undelete_archives: + if find_lost_archives: self.rebuild_archives_directory() self.rebuild_archives( match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest @@ -1813,8 +1815,10 @@ def rebuild_archives_directory(self): """Rebuild the archives directory, undeleting archives. Iterates through all objects in the repository looking for archive metadata blocks. - When finding some that do not have a corresponding archives directory entry, it will - create that entry (undeleting all archives). + When finding some that do not have a corresponding archives directory entry (either + a normal entry for an "existing" archive, or a soft-deleted entry for a "deleted" + archive), it will create that entry (making the archives directory consistent with + the repository). """ def valid_archive(obj): @@ -1831,6 +1835,16 @@ def valid_archive(obj): ) for chunk_id, _ in self.chunks.iteritems(): pi.show() + cdata = self.repository.get(chunk_id, read_data=False) # only get metadata + try: + meta = self.repo_objs.parse_meta(chunk_id, cdata, ro_type=ROBJ_DONTCARE) + except IntegrityErrorBase as exc: + logger.error("Skipping corrupted chunk: %s", exc) + self.error_found = True + continue + if meta["type"] != ROBJ_ARCHIVE_META: + continue + # now we know it is an archive metadata chunk, load the full object from the repo: cdata = self.repository.get(chunk_id) try: meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE) @@ -1839,7 +1853,7 @@ def valid_archive(obj): self.error_found = True continue if meta["type"] != ROBJ_ARCHIVE_META: - continue + continue # should never happen try: archive = msgpack.unpackb(data) # Ignore exceptions that might be raised when feeding msgpack with invalid data @@ -1850,12 +1864,18 @@ def valid_archive(obj): archive = ArchiveItem(internal_dict=archive) name = archive.name archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id) - logger.info(f"Found archive {name} {archive_id_hex}.") - if self.manifest.archives.exists_name_and_id(name, archive_id): - logger.info("We already have an archives directory entry for this.") + if self.manifest.archives.exists_id(archive_id, deleted=False): + logger.debug(f"We already have an archives directory entry for {name} {archive_id_hex}.") + elif self.manifest.archives.exists_id(archive_id, deleted=True): + logger.debug(f"We already have a deleted archives directory entry for {name} {archive_id_hex}.") else: - logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.") - self.manifest.archives.create(name, archive_id, archive.time) + self.error_found = True + if self.repair: + logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.") + self.manifest.archives.create(name, archive_id, archive.time) + else: + logger.warning(f"Would create archives directory entry for {name} {archive_id_hex}.") + pi.finish() logger.info("Rebuilding missing archives directory entries completed.") diff --git a/src/borg/archiver/__init__.py b/src/borg/archiver/__init__.py index add009bea..f8939f894 100644 --- a/src/borg/archiver/__init__.py +++ b/src/borg/archiver/__init__.py @@ -92,6 +92,7 @@ def get_func(args): from .tag_cmd import TagMixIn from .tar_cmds import TarMixIn from .transfer_cmd import TransferMixIn +from .undelete_cmd import UnDeleteMixIn from .version_cmd import VersionMixIn @@ -124,6 +125,7 @@ class Archiver( TagMixIn, TarMixIn, TransferMixIn, + UnDeleteMixIn, VersionMixIn, ): def __init__(self, lock_wait=None, prog=None): @@ -364,6 +366,7 @@ def build_parser(self): self.build_parser_tag(subparsers, common_parser, mid_common_parser) self.build_parser_tar(subparsers, common_parser, mid_common_parser) self.build_parser_transfer(subparsers, common_parser, mid_common_parser) + self.build_parser_undelete(subparsers, common_parser, mid_common_parser) self.build_parser_version(subparsers, common_parser, mid_common_parser) return parser diff --git a/src/borg/archiver/_common.py b/src/borg/archiver/_common.py index f8223eb73..068a1fba5 100644 --- a/src/borg/archiver/_common.py +++ b/src/borg/archiver/_common.py @@ -369,7 +369,9 @@ def define_exclusion_group(subparser, **kwargs): return exclude_group -def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, oldest_newest=True, older_newer=True): +def define_archive_filters_group( + subparser, *, sort_by=True, first_last=True, oldest_newest=True, older_newer=True, deleted=False +): filters_group = subparser.add_argument_group( "Archive filters", "Archive filters can be applied to repository targets." ) @@ -456,6 +458,11 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, ol help="consider archives newer than (now - TIMESPAN), e.g. 7d or 12m.", ) + if deleted: + filters_group.add_argument( + "--deleted", dest="deleted", action="store_true", help="consider only deleted archives." + ) + return filters_group diff --git a/src/borg/archiver/check_cmd.py b/src/borg/archiver/check_cmd.py index a7d0ea990..a5b5bf09f 100644 --- a/src/borg/archiver/check_cmd.py +++ b/src/borg/archiver/check_cmd.py @@ -35,10 +35,10 @@ def do_check(self, args, repository): raise CommandError( "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments." ) + if args.repo_only and args.find_lost_archives: + raise CommandError("--repository-only contradicts the --find-lost-archives option.") if args.repair and args.max_duration: raise CommandError("--repair does not allow --max-duration argument.") - if args.undelete_archives and not args.repair: - raise CommandError("--undelete-archives requires --repair argument.") if args.max_duration and not args.repo_only: # when doing a partial repo check, we can only check xxh64 hashes in repository files. # archives check requires that a full repo check was done before and has built/cached a ChunkIndex. @@ -51,7 +51,7 @@ def do_check(self, args, repository): repository, verify_data=args.verify_data, repair=args.repair, - undelete_archives=args.undelete_archives, + find_lost_archives=args.find_lost_archives, match=args.match_archives, sort_by=args.sort_by or "ts", first=args.first, @@ -85,12 +85,14 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser): archive data (requires ``--verify-data``). This includes ensuring that the repository manifest exists, the archive metadata chunk is present, and that all chunks referencing files (items) in the archive exist. This requires - reading archive and file metadata, but not data. To cryptographically verify - the file (content) data integrity pass ``--verify-data``, but keep in mind - that this requires reading all data and is hence very time consuming. When - checking archives of a remote repository, archive checks run on the client - machine because they require decrypting data and therefore the encryption - key. + reading archive and file metadata, but not data. To scan for archives whose + entries were lost from the archive directory, pass ``--find-lost-archives``. + It requires reading all data and is hence very time consuming. + To additionally cryptographically verify the file (content) data integrity, + pass ``--verify-data``, which is even more time consuming. + + When checking archives of a remote repository, archive checks run on the client + machine because they require decrypting data and therefore the encryption key. Both steps can also be run independently. Pass ``--repository-only`` to run the repository checks only, or pass ``--archives-only`` to run the archive checks @@ -122,6 +124,15 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser): encrypted repositories against attackers without access to the keys. You can not use ``--verify-data`` with ``--repository-only``. + The ``--find-lost-archives`` option will also scan the whole repository, but + tells Borg to search for lost archive metadata. If Borg encounters any archive + metadata that doesn't match with an archive directory entry, it means that an + entry was lost. + Unless ``borg compact`` is called, these archives can be fully restored with + ``--repair``. Please note that ``--find-lost-archives`` must read a lot of + data from the repository and is thus very time consuming. You can not use + ``--find-lost-archives`` with ``--repository-only``. + About repair mode +++++++++++++++++ @@ -180,10 +191,8 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser): Consequently, if lost chunks were repaired earlier, it is advised to run ``--repair`` a second time after creating some new backups. - If ``--repair --undelete-archives`` is given, Borg will scan the repository - for archive metadata and if it finds some where no corresponding archives - directory entry exists, it will create the entries. This is basically undoing - ``borg delete archive`` or ``borg prune ...`` commands and only possible before + If ``--repair --find-lost-archives`` is given, previously lost entries will + be recreated in the archive directory. This is only possible before ``borg compact`` would remove the archives' data completely. """ ) @@ -213,10 +222,7 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser): "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found" ) subparser.add_argument( - "--undelete-archives", - dest="undelete_archives", - action="store_true", - help="attempt to undelete archives (use with --repair)", + "--find-lost-archives", dest="find_lost_archives", action="store_true", help="attempt to find lost archives" ) subparser.add_argument( "--max-duration", diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py index 20c9fa480..310296fdb 100644 --- a/src/borg/archiver/compact_cmd.py +++ b/src/borg/archiver/compact_cmd.py @@ -127,6 +127,15 @@ def report_and_delete(self): logger.warning(f"{len(self.reappeared_chunks)} previously missing objects re-appeared!" + run_repair) set_ec(EXIT_WARNING) + logger.info("Cleaning archives directory from deleted archives...") + archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True) + for archive_info in archive_infos: + name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id) + try: + self.manifest.archives.nuke_by_id(id) + except KeyError: + self.print_warning(f"Archive {name} {hex_id} not found.") + repo_size_before = self.repository_size logger.info("Determining unused objects...") unused = set() @@ -166,9 +175,33 @@ def build_parser_compact(self, subparsers, common_parser, mid_common_parser): """ Free repository space by deleting unused chunks. - borg compact analyzes all existing archives to find out which chunks are - actually used. There might be unused chunks resulting from borg delete or prune, - which can be removed to free space in the repository. + borg compact analyzes all existing archives to find out which repository + objects are actually used (referenced). It then removes all unused objects + to free repository space. + + Unused objects may result from: + + - borg delete or prune usage + - interrupted backups (maybe retry the backup first before running compact!) + - backup of source files that had an I/O error in the middle of their contents + and that were skipped due to this. + - corruption of the repository (e.g. the archives directory having lost entries) + + You usually don't want to run ``borg compact`` after every write operation, but + either regularly (e.g. once a month, possibly together with ``borg check``) or + when disk space needs to be freed. + + **Important:** + + After compacting it is not possible anymore to use ``borg undelete`` to recover + previously deleted archives. + + ``borg compact`` might also delete data from archives that were "lost" due to + archives directory corruption. Such archives could potentially be restored with + ``borg check --find-lost-archives [--repair]``, which is slow and thus you + maybe usually don't want to do that unless there are signs of lost archives + (e.g. when seeing fatal errors when creating backups or when archives are + missing in ``borg list``). Differently than borg 1.x, borg2's compact needs the borg key if the repo is encrypted. diff --git a/src/borg/archiver/delete_cmd.py b/src/borg/archiver/delete_cmd.py index 5ff05d33d..ee411428b 100644 --- a/src/borg/archiver/delete_cmd.py +++ b/src/borg/archiver/delete_cmd.py @@ -64,8 +64,11 @@ def build_parser_delete(self, subparsers, common_parser, mid_common_parser): """ This command deletes archives from the repository. - Important: When deleting archives, repository disk space is **not** freed until - you run ``borg compact``. + Important: + + - Repository disk space is **not** freed until you run ``borg compact``. + - You can use ``borg undelete`` to undelete archives, but only until + you run ``borg compact``. When in doubt, use ``--dry-run --list`` to see what would be deleted. diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index b970bc36e..654d89fcc 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -215,7 +215,11 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): The prune command prunes a repository by deleting all archives not matching any of the specified retention options. - Important: Repository disk space is **not** freed until you run ``borg compact``. + Important: + + - Repository disk space is **not** freed until you run ``borg compact``. + - You can use ``borg undelete`` to undelete archives, but only until + you run ``borg compact``. This command is normally used by automated backup scripts wanting to keep a certain number of historic backups. This retention policy is commonly referred to as diff --git a/src/borg/archiver/repo_list_cmd.py b/src/borg/archiver/repo_list_cmd.py index 752b706f2..fbdb327d3 100644 --- a/src/borg/archiver/repo_list_cmd.py +++ b/src/borg/archiver/repo_list_cmd.py @@ -26,7 +26,7 @@ def do_repo_list(self, args, repository, manifest): "BORG_RLIST_FORMAT", "{id:.8} {time} {archive:<15} {tags:<10} {username:<10} {hostname:<10} {comment:.40}{NL}", ) - formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec) + formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec, deleted=args.deleted) output_data = [] @@ -113,4 +113,4 @@ def build_parser_repo_list(self, subparsers, common_parser, mid_common_parser): "but keys used in it are added to the JSON output. " "Some keys are always present. Note: JSON can only represent text.", ) - define_archive_filters_group(subparser) + define_archive_filters_group(subparser, deleted=True) diff --git a/src/borg/archiver/undelete_cmd.py b/src/borg/archiver/undelete_cmd.py new file mode 100644 index 000000000..31e38ca51 --- /dev/null +++ b/src/borg/archiver/undelete_cmd.py @@ -0,0 +1,90 @@ +import argparse +import logging + +from ._common import with_repository +from ..constants import * # NOQA +from ..helpers import format_archive, CommandError, bin_to_hex, archivename_validator +from ..manifest import Manifest + +from ..logger import create_logger + +logger = create_logger() + + +class UnDeleteMixIn: + @with_repository(manifest=False) + def do_undelete(self, args, repository): + """Undelete archives""" + self.output_list = args.output_list + dry_run = args.dry_run + manifest = Manifest.load(repository, (Manifest.Operation.DELETE,)) + if args.name: + archive_infos = [manifest.archives.get_one([args.name], deleted=True)] + else: + args.deleted = True + archive_infos = manifest.archives.list_considering(args) + count = len(archive_infos) + if count == 0: + return + if not args.name and not args.match_archives and args.first == 0 and args.last == 0: + raise CommandError("Aborting: if you really want to undelete all archives, please use -a 'sh:*'.") + + undeleted = False + logger_list = logging.getLogger("borg.output.list") + for i, archive_info in enumerate(archive_infos, 1): + name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id) + try: + if not dry_run: + manifest.archives.undelete_by_id(id) + except KeyError: + self.print_warning(f"Archive {name} {hex_id} not found ({i}/{count}).") + else: + undeleted = True + if self.output_list: + msg = "Would undelete: {} ({}/{})" if dry_run else "Undeleted archive: {} ({}/{})" + logger_list.info(msg.format(format_archive(archive_info), i, count)) + if dry_run: + logger.info("Finished dry-run.") + elif undeleted: + manifest.write() + self.print_warning("Done.", wc=None) + else: + self.print_warning("Aborted.", wc=None) + return + + def build_parser_undelete(self, subparsers, common_parser, mid_common_parser): + from ._common import process_epilog, define_archive_filters_group + + undelete_epilog = process_epilog( + """ + This command undeletes archives in the repository. + + Important: Undeleting archives is only possible before compacting. + Once ``borg compact`` has run, all disk space occupied only by the + deleted archives will be freed and undelete is not possible anymore. + + When in doubt, use ``--dry-run --list`` to see what would be undeleted. + + You can undelete multiple archives by specifying a matching pattern, + using the ``--match-archives PATTERN`` option (for more info on these patterns, + see :ref:`borg_patterns`). + """ + ) + subparser = subparsers.add_parser( + "undelete", + parents=[common_parser], + add_help=False, + description=self.do_undelete.__doc__, + epilog=undelete_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help="undelete archive", + ) + subparser.set_defaults(func=self.do_undelete) + subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository") + subparser.add_argument( + "--list", dest="output_list", action="store_true", help="output verbose list of archives" + ) + define_archive_filters_group(subparser) + subparser.add_argument( + "name", metavar="NAME", nargs="?", type=archivename_validator, help="specify the archive name" + ) diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index f995c8467..0f3f397da 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -718,7 +718,7 @@ class ArchiveFormatter(BaseFormatter): ("size", "nfiles"), ) - def __init__(self, format, repository, manifest, key, *, iec=False): + def __init__(self, format, repository, manifest, key, *, iec=False, deleted=False): static_data = {} # here could be stuff on repo level, above archive level static_data.update(self.FIXED_KEYS) super().__init__(format, static_data) @@ -728,6 +728,7 @@ def __init__(self, format, repository, manifest, key, *, iec=False): self.name = None self.id = None self._archive = None + self.deleted = deleted # True if we want to deal with deleted archives. self.iec = iec self.format_keys = {f[1] for f in Formatter().parse(format)} self.call_keys = { @@ -772,7 +773,7 @@ def archive(self): if self._archive is None or self._archive.id != self.id: from ..archive import Archive - self._archive = Archive(self.manifest, self.id, iec=self.iec) + self._archive = Archive(self.manifest, self.id, iec=self.iec, deleted=self.deleted) return self._archive def get_meta(self, key, default=None): diff --git a/src/borg/manifest.py b/src/borg/manifest.py index 9586b5895..608bfcaab 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -101,11 +101,17 @@ def finish(self, manifest): manifest_archives = StableDict(self._get_raw_dict()) return manifest_archives - def ids(self): + def ids(self, *, deleted=False): # yield the binary IDs of all archives if not self.legacy: try: - infos = list(self.repository.store_list("archives")) + infos = list(self.repository.store_list("archives", deleted=deleted)) + if deleted: + # hack: store_list(deleted=True) yields deleted AND not deleted items, + # guess this should be fixed in a future borgstore release. + # for now, we remove the not-deleted archives here: + not_deleted_infos = set(self.repository.store_list("archives", deleted=False)) + infos = [info for info in infos if info not in not_deleted_infos] except ObjectNotFound: infos = [] for info in infos: @@ -156,13 +162,13 @@ def _get_archive_meta(self, id: bytes) -> dict: ) return metadata - def _infos(self): + def _infos(self, *, deleted=False): # yield the infos of all archives - for id in self.ids(): + for id in self.ids(deleted=deleted): yield self._get_archive_meta(id) - def _info_tuples(self): - for info in self._infos(): + def _info_tuples(self, *, deleted=False): + for info in self._infos(deleted=deleted): yield ArchiveInfo( name=info["name"], id=info["id"], @@ -172,8 +178,8 @@ def _info_tuples(self): host=info["hostname"], ) - def _matching_info_tuples(self, match_patterns, match_end): - archive_infos = list(self._info_tuples()) + def _matching_info_tuples(self, match_patterns, match_end, *, deleted=False): + archive_infos = list(self._info_tuples(deleted=deleted)) if match_patterns: assert isinstance(match_patterns, list), f"match_pattern is a {type(match_patterns)}" for match in match_patterns: @@ -216,6 +222,14 @@ def exists(self, name): else: return name in self._archives + def exists_id(self, id, *, deleted=False): + # check if an archive with this id exists + assert isinstance(id, bytes) + if not self.legacy: + return id in self.ids(deleted=deleted) + else: + raise NotImplementedError + def exists_name_and_id(self, name, id): # check if an archive with this name AND id exists assert isinstance(name, str) @@ -279,13 +293,14 @@ def get(self, name, raw=False): else: return dict(name=name, id=values["id"], time=values["time"]) - def get_by_id(self, id, raw=False): + def get_by_id(self, id, raw=False, *, deleted=False): assert isinstance(id, bytes) if not self.legacy: - if id in self.ids(): # check directory + if id in self.ids(deleted=deleted): # check directory # looks like this archive id is in the archives directory, thus it is NOT deleted. + # OR we have explicitly requested a soft-deleted archive via deleted=True. archive_info = self._get_archive_meta(id) - if archive_info["exists"]: + if archive_info["exists"]: # True means we have found Archive metadata in the repo. if not raw: ts = parse_timestamp(archive_info["time"]) archive_info = ArchiveInfo( @@ -324,10 +339,22 @@ def create(self, name, id, ts, *, overwrite=False): self._archives[name] = {"id": id, "time": ts} def delete_by_id(self, id): - # delete an archive + # soft-delete an archive assert isinstance(id, bytes) assert not self.legacy - self.repository.store_delete(f"archives/{bin_to_hex(id)}") + self.repository.store_move(f"archives/{bin_to_hex(id)}", delete=True) # soft-delete + + def undelete_by_id(self, id): + # undelete an archive + assert isinstance(id, bytes) + assert not self.legacy + self.repository.store_move(f"archives/{bin_to_hex(id)}", undelete=True) + + def nuke_by_id(self, id): + # really delete an already soft-deleted archive + assert isinstance(id, bytes) + assert not self.legacy + self.repository.store_delete(f"archives/{bin_to_hex(id)}", deleted=True) def list( self, @@ -342,6 +369,7 @@ def list( newer=None, oldest=None, newest=None, + deleted=False, ): """ Return list of ArchiveInfo instances according to the parameters. @@ -363,7 +391,7 @@ def list( if isinstance(sort_by, (str, bytes)): raise TypeError("sort_by must be a sequence of str") - archive_infos = self._matching_info_tuples(match, match_end) + archive_infos = self._matching_info_tuples(match, match_end, deleted=deleted) if any([oldest, newest, older, newer]): archive_infos = filter_archives_by_date( @@ -397,12 +425,13 @@ def list_considering(self, args): newer=getattr(args, "newer", None), oldest=getattr(args, "oldest", None), newest=getattr(args, "newest", None), + deleted=getattr(args, "deleted", False), ) - def get_one(self, match, *, match_end=r"\Z"): + def get_one(self, match, *, match_end=r"\Z", deleted=False): """get exactly one archive matching """ assert match is not None - archive_infos = self._matching_info_tuples(match, match_end) + archive_infos = self._matching_info_tuples(match, match_end, deleted=deleted) if len(archive_infos) != 1: raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.") return archive_infos[0] diff --git a/src/borg/remote.py b/src/borg/remote.py index 27ec9f68b..fcf6a116f 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -180,6 +180,7 @@ class RepositoryServer: # pragma: no cover "store_load", "store_store", "store_delete", + "store_move", ) def __init__(self, restrict_to_paths, restrict_to_repositories, append_only, storage_quota, use_socket): @@ -1077,8 +1078,10 @@ def get_manifest(self): def put_manifest(self, data): """actual remoting is done via self.call in the @api decorator""" - @api(since=parse_version("2.0.0b8")) - def store_list(self, name): + @api( + since=parse_version("2.0.0b8"), deleted={"since": parse_version("2.0.0b13"), "previously": False} # TODO -> b14 + ) + def store_list(self, name, *, deleted=False): """actual remoting is done via self.call in the @api decorator""" @api(since=parse_version("2.0.0b8")) @@ -1089,8 +1092,14 @@ def store_load(self, name): def store_store(self, name, value): """actual remoting is done via self.call in the @api decorator""" - @api(since=parse_version("2.0.0b8")) - def store_delete(self, name): + @api( + since=parse_version("2.0.0b8"), deleted={"since": parse_version("2.0.0b13"), "previously": False} + ) # TODO -> b14) + def store_delete(self, name, *, deleted=False): + """actual remoting is done via self.call in the @api decorator""" + + @api(since=parse_version("2.0.0b13")) # TODO -> b14 + def store_move(self, name, new_name=None, *, delete=False, undelete=False, deleted=False): """actual remoting is done via self.call in the @api decorator""" diff --git a/src/borg/repository.py b/src/borg/repository.py index 93c5a4f74..5f7ac27e6 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -519,10 +519,10 @@ def put_manifest(self, data): self._lock_refresh() return self.store.store("config/manifest", data) - def store_list(self, name): + def store_list(self, name, *, deleted=False): self._lock_refresh() try: - return list(self.store.list(name)) + return list(self.store.list(name, deleted=deleted)) except StoreObjectNotFound: return [] @@ -534,6 +534,10 @@ def store_store(self, name, value): self._lock_refresh() return self.store.store(name, value) - def store_delete(self, name): + def store_delete(self, name, *, deleted=False): self._lock_refresh() - return self.store.delete(name) + return self.store.delete(name, deleted=deleted) + + def store_move(self, name, new_name=None, *, delete=False, undelete=False, deleted=False): + self._lock_refresh() + return self.store.move(name, new_name, delete=delete, undelete=undelete, deleted=deleted) diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 0eb19e7f1..cc2ee31e5 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -1,4 +1,5 @@ from datetime import datetime, timezone, timedelta +from pathlib import Path import shutil from unittest.mock import patch @@ -270,18 +271,21 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request): def test_check_undelete_archives(archivers, request): archiver = request.getfixturevalue(archivers) check_cmd_setup(archiver) # creates archive1 and archive2 - # borg delete does it rather quick and dirty: it only kills the archives directory entry - cmd(archiver, "delete", "archive1") - cmd(archiver, "delete", "archive2") - output = cmd(archiver, "repo-list") - assert "archive1" not in output - assert "archive2" not in output - # borg check will re-discover archive1 and archive2 and new archives directory entries - # will be created because we requested undeleting archives. - cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0) + existing_archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines()) + create_src_archive(archiver, "archive3") + archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines()) + new_archive_id_hex = (archive_ids - existing_archive_ids).pop() + (Path(archiver.repository_path) / "archives" / new_archive_id_hex).unlink() # lose the entry for archive3 output = cmd(archiver, "repo-list") assert "archive1" in output assert "archive2" in output + assert "archive3" not in output + # borg check will re-discover archive3 and create a new archives directory entry. + cmd(archiver, "check", "--repair", "--find-lost-archives", exit_code=0) + output = cmd(archiver, "repo-list") + assert "archive1" in output + assert "archive2" in output + assert "archive3" in output def test_spoofed_archive(archivers, request): diff --git a/src/borg/testsuite/archiver/repo_list_cmd_test.py b/src/borg/testsuite/archiver/repo_list_cmd_test.py index ffce5af00..66cc9bf63 100644 --- a/src/borg/testsuite/archiver/repo_list_cmd_test.py +++ b/src/borg/testsuite/archiver/repo_list_cmd_test.py @@ -98,3 +98,23 @@ def test_repo_list_json(archivers, request): assert "keyfile" not in list_repo["encryption"] archive0 = list_repo["archives"][0] checkts(archive0["time"]) + + +def test_repo_list_deleted(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "normal1", src_dir) + cmd(archiver, "create", "deleted1", src_dir) + cmd(archiver, "create", "normal2", src_dir) + cmd(archiver, "create", "deleted2", src_dir) + cmd(archiver, "delete", "-a", "sh:deleted*") + output = cmd(archiver, "repo-list") + assert "normal1" in output + assert "normal2" in output + assert "deleted1" not in output + assert "deleted2" not in output + output = cmd(archiver, "repo-list", "--deleted") + assert "normal1" not in output + assert "normal2" not in output + assert "deleted1" in output + assert "deleted2" in output diff --git a/src/borg/testsuite/archiver/undelete_cmd_test.py b/src/borg/testsuite/archiver/undelete_cmd_test.py new file mode 100644 index 000000000..8cbc18a93 --- /dev/null +++ b/src/borg/testsuite/archiver/undelete_cmd_test.py @@ -0,0 +1,67 @@ +from ...constants import * # NOQA +from . import cmd, create_regular_file, generate_archiver_tests, RK_ENCRYPTION + +pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA + + +def test_undelete_single(archivers, request): + archiver = request.getfixturevalue(archivers) + create_regular_file(archiver.input_path, "file1", size=1024 * 80) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "normal", "input") + cmd(archiver, "create", "deleted", "input") + cmd(archiver, "delete", "deleted") + output = cmd(archiver, "repo-list") + assert "normal" in output + assert "deleted" not in output + cmd(archiver, "undelete", "deleted") + output = cmd(archiver, "repo-list") + assert "normal" in output + assert "deleted" in output # it's back! + cmd(archiver, "check") + + +def test_undelete_multiple_dryrun(archivers, request): + archiver = request.getfixturevalue(archivers) + create_regular_file(archiver.input_path, "file1", size=1024 * 80) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "normal", "input") + cmd(archiver, "create", "deleted1", "input") + cmd(archiver, "create", "deleted2", "input") + cmd(archiver, "delete", "deleted1") + cmd(archiver, "delete", "deleted2") + output = cmd(archiver, "repo-list") + assert "normal" in output + assert "deleted1" not in output + assert "deleted2" not in output + output = cmd(archiver, "undelete", "--dry-run", "--list", "-a", "sh:*") + assert "normal" not in output # not a candidate for undeletion + assert "deleted1" in output # candidate for undeletion + assert "deleted2" in output # candidate for undeletion + output = cmd(archiver, "repo-list") # nothing change, it was a dry-run + assert "normal" in output + assert "deleted1" not in output + assert "deleted2" not in output + + +def test_undelete_multiple_run(archivers, request): + archiver = request.getfixturevalue(archivers) + create_regular_file(archiver.input_path, "file1", size=1024 * 80) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "normal", "input") + cmd(archiver, "create", "deleted1", "input") + cmd(archiver, "create", "deleted2", "input") + cmd(archiver, "delete", "deleted1") + cmd(archiver, "delete", "deleted2") + output = cmd(archiver, "repo-list") + assert "normal" in output + assert "deleted1" not in output + assert "deleted2" not in output + output = cmd(archiver, "undelete", "--list", "-a", "sh:*") + assert "normal" not in output # not undeleted + assert "deleted1" in output # undeleted + assert "deleted2" in output # undeleted + output = cmd(archiver, "repo-list") # nothing change, it was a dry-run + assert "normal" in output + assert "deleted1" in output + assert "deleted2" in output