1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-13 07:33:47 +00:00

NAME is the series, archive id is the hash

aid:<archive-id-prefix> can be used for -a / --match-archives
to match on the archive id (prefix) instead of the name.

NAME positional argument now also supports matching (and aid:),
but requires that there is exactly ONE result.
This commit is contained in:
Thomas Waldmann 2024-09-13 01:02:05 +02:00
parent ed31131fb6
commit 8237e6beca
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
12 changed files with 151 additions and 97 deletions

View file

@ -442,6 +442,7 @@ class Archive:
self,
manifest,
name,
*,
cache=None,
create=False,
numeric_ids=False,
@ -458,6 +459,7 @@ class Archive:
log_json=False,
iec=False,
):
name_is_id = isinstance(name, bytes)
self.cwd = os.getcwd()
assert isinstance(manifest, Manifest)
self.manifest = manifest
@ -493,10 +495,12 @@ class Archive:
self.create = create
if self.create:
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
if manifest.archives.exists(name):
raise self.AlreadyExists(name)
else:
info = self.manifest.archives.get(name)
if name_is_id:
# we also go over the manifest here to avoid quick&dirty deleted archives
info = self.manifest.archives.get_by_id(name)
else:
info = self.manifest.archives.get(name)
if info is None:
raise self.DoesNotExist(name)
self.load(info.id)
@ -611,8 +615,6 @@ Duration: {0.duration}
def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
name = name or self.name
if self.manifest.archives.exists(name):
raise self.AlreadyExists(name)
self.items_buffer.flush(flush=True)
item_ptrs = archive_put_items(
self.items_buffer.chunks, repo_objs=self.repo_objs, cache=self.cache, stats=self.stats
@ -956,18 +958,16 @@ Duration: {0.duration}
self.id = new_id
def rename(self, name):
if self.manifest.archives.exists(name):
raise self.AlreadyExists(name)
oldname = self.name
old_id = self.id
self.name = name
self.set_meta("name", name)
self.manifest.archives.delete(oldname)
self.manifest.archives.delete_by_id(old_id)
def delete(self):
# quick and dirty: we just nuke the archive from the archives list - that will
# potentially orphan all chunks previously referenced by the archive, except the ones also
# referenced by other archives. In the end, "borg compact" will clean up and free space.
self.manifest.archives.delete(self.name)
self.manifest.archives.delete_by_id(self.id)
@staticmethod
def compare_archives_iter(
@ -2090,7 +2090,9 @@ class ArchiveChecker:
logger.debug(f"archive id new: {bin_to_hex(new_archive_id)}")
cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
add_reference(new_archive_id, len(data), cdata)
self.manifest.archives.create(info.name, new_archive_id, info.ts, overwrite=True)
self.manifest.archives.create(info.name, new_archive_id, info.ts)
if archive_id != new_archive_id:
self.manifest.archives.delete_by_id(archive_id)
pi.finish()
def finish(self):
@ -2148,18 +2150,16 @@ class ArchiveRecreater:
self.progress = progress
self.print_file_status = file_status_printer or (lambda *args: None)
def recreate(self, archive_name, comment=None, target_name=None):
assert not self.is_temporary_archive(archive_name)
archive = self.open_archive(archive_name)
def recreate(self, archive_id, target_name, delete_original, comment=None):
archive = self.open_archive(archive_id)
target = self.create_target(archive, target_name)
if self.exclude_if_present or self.exclude_caches:
self.matcher_add_tagged_dirs(archive)
if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
if self.matcher.empty() and not target.recreate_rechunkify and comment is None:
# nothing to do
return False
self.process_items(archive, target)
replace_original = target_name is None
self.save(archive, target, comment, replace_original=replace_original)
self.save(archive, target, comment, delete_original=delete_original)
return True
def process_items(self, archive, target):
@ -2216,7 +2216,7 @@ class ArchiveRecreater:
for chunk in chunk_iterator:
yield Chunk(chunk, size=len(chunk), allocation=CH_DATA)
def save(self, archive, target, comment=None, replace_original=True):
def save(self, archive, target, comment=None, delete_original=True):
if self.dry_run:
return
if comment is None:
@ -2242,9 +2242,8 @@ class ArchiveRecreater:
}
target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)
if replace_original:
if delete_original:
archive.delete()
target.rename(archive.name)
if self.stats:
target.start = _start
target.end = archive_ts_now()
@ -2277,9 +2276,8 @@ class ArchiveRecreater:
matcher.add(tag_files, IECommand.Include)
matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)
def create_target(self, archive, target_name=None):
def create_target(self, archive, target_name):
"""Create target archive."""
target_name = target_name or archive.name + ".recreate"
target = self.create_target_archive(target_name)
# If the archives use the same chunker params, then don't rechunkify
source_chunker_params = tuple(archive.metadata.get("chunker_params", []))
@ -2308,5 +2306,5 @@ class ArchiveRecreater:
)
return target
def open_archive(self, name, **kwargs):
return Archive(self.manifest, name, cache=self.cache, **kwargs)
def open_archive(self, archive_id, **kwargs):
return Archive(self.manifest, archive_id, cache=self.cache, **kwargs)

View file

@ -257,9 +257,10 @@ def with_archive(method):
def wrapper(self, args, repository, manifest, **kwargs):
archive_name = getattr(args, "name", None)
assert archive_name is not None
archive_info = manifest.archives.get_one(archive_name)
archive = Archive(
manifest,
archive_name,
archive_info.id,
numeric_ids=getattr(args, "numeric_ids", False),
noflags=getattr(args, "noflags", False),
noacls=getattr(args, "noacls", False),

View file

@ -32,7 +32,8 @@ class DebugMixIn:
def do_debug_dump_archive_items(self, args, repository, manifest):
"""dump (decrypted, decompressed) archive items metadata (not: data)"""
repo_objs = manifest.repo_objs
archive = Archive(manifest, args.name)
archive_info = manifest.archives.get_one(args.name)
archive = Archive(manifest, archive_info.id)
for i, item_id in enumerate(archive.metadata.items):
_, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM)
filename = "%06d_%s.items" % (i, bin_to_hex(item_id))
@ -44,9 +45,10 @@ class DebugMixIn:
@with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
def do_debug_dump_archive(self, args, repository, manifest):
"""dump decoded archive metadata (not: data)"""
archive_info = manifest.archives.get_one(args.name)
repo_objs = manifest.repo_objs
try:
archive_meta_orig = manifest.archives.get(args.name, raw=True)
archive_meta_orig = manifest.archives.get_by_id(archive_info.id, raw=True)
except KeyError:
raise Archive.DoesNotExist(args.name)

View file

@ -3,7 +3,7 @@ import logging
from ._common import with_repository
from ..constants import * # NOQA
from ..helpers import format_archive, CommandError
from ..helpers import format_archive, CommandError, bin_to_hex
from ..manifest import Manifest
from ..logger import create_logger
@ -18,8 +18,9 @@ class DeleteMixIn:
self.output_list = args.output_list
dry_run = args.dry_run
manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
if not archive_names:
archive_infos = manifest.archives.list_considering(args)
count = len(archive_infos)
if count == 0:
return
if args.match_archives is None and args.first == 0 and args.last == 0:
raise CommandError(
@ -29,18 +30,20 @@ class DeleteMixIn:
deleted = False
logger_list = logging.getLogger("borg.output.list")
for i, archive_name in enumerate(archive_names, 1):
for i, archive_info in enumerate(archive_infos, 1):
name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id)
try:
# this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
# would make the code in class Archive crash, so the user can at least get rid of such archives.
current_archive = manifest.archives.delete(archive_name)
if not dry_run:
manifest.archives.delete_by_id(id)
except KeyError:
self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
self.print_warning(f"Archive {name} {hex_id} not found ({i}/{count}).")
else:
deleted = True
if self.output_list:
msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})"
logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names)))
logger_list.info(msg.format(format_archive(archive_info), i, count))
if dry_run:
logger.info("Finished dry-run.")
elif deleted:

View file

@ -18,12 +18,12 @@ class InfoMixIn:
def do_info(self, args, repository, manifest, cache):
"""Show archive details such as disk space used"""
archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
archive_infos = manifest.archives.list_considering(args)
output_data = []
for i, archive_name in enumerate(archive_names, 1):
archive = Archive(manifest, archive_name, cache=cache, iec=args.iec)
for i, archive_info in enumerate(archive_infos, 1):
archive = Archive(manifest, archive_info.id, cache=cache, iec=args.iec)
info = archive.info()
if args.json:
output_data.append(info)
@ -48,7 +48,7 @@ class InfoMixIn:
.strip()
.format(**info)
)
if not args.json and len(archive_names) - i:
if not args.json and len(archive_infos) - i:
print()
if args.json:

View file

@ -27,8 +27,10 @@ class ListMixIn:
else:
format = os.environ.get("BORG_LIST_FORMAT", "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}")
archive_info = manifest.archives.get_one(args.name)
def _list_inner(cache):
archive = Archive(manifest, args.name, cache=cache)
archive = Archive(manifest, archive_info.id, cache=cache)
formatter = ItemFormatter(archive, format)
for item in archive.iter_items(lambda item: matcher.match(item.path)):
sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))

View file

@ -125,7 +125,7 @@ class PruneMixIn:
else:
archives_deleted += 1
log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
archive = Archive(manifest, archive.name, cache)
archive = Archive(manifest, archive.id, cache=cache)
archive.delete()
uncommitted_deletes += 1
else:

View file

@ -5,7 +5,7 @@ from ._common import build_matcher
from ..archive import ArchiveRecreater
from ..constants import * # NOQA
from ..compress import CompressionSpec
from ..helpers import archivename_validator, comment_validator, PathSpec, ChunkerParams, CommandError
from ..helpers import archivename_validator, comment_validator, PathSpec, ChunkerParams, bin_to_hex
from ..helpers import timestamp
from ..manifest import Manifest
@ -38,15 +38,19 @@ class RecreateMixIn:
timestamp=args.timestamp,
)
archive_names = tuple(archive.name for archive in manifest.archives.list_considering(args))
if args.target is not None and len(archive_names) != 1:
raise CommandError("--target: Need to specify single archive")
for name in archive_names:
if recreater.is_temporary_archive(name):
for archive_info in manifest.archives.list_considering(args):
if recreater.is_temporary_archive(archive_info.name):
continue
print("Processing", name)
if not recreater.recreate(name, args.comment, args.target):
logger.info("Skipped archive %s: Nothing to do. Archive was not processed.", name)
name, hex_id = archive_info.name, bin_to_hex(archive_info.id)
print(f"Processing {name} {hex_id}")
if args.target:
target = args.target
delete_original = False
else:
target = archive_info.name
delete_original = True
if not recreater.recreate(archive_info.id, target, delete_original, args.comment):
logger.info(f"Skipped archive {name} {hex_id}: Nothing to do.")
if not args.dry_run:
manifest.write()
@ -135,8 +139,7 @@ class RecreateMixIn:
default=None,
type=archivename_validator,
action=Highlander,
help="create a new archive with the name ARCHIVE, do not replace existing archive "
"(only applies for a single archive)",
help="create a new archive with the name ARCHIVE, do not replace existing archive",
)
archive_group.add_argument(
"--comment",

View file

@ -33,14 +33,15 @@ class TransferMixIn:
)
dry_run = args.dry_run
archive_names = tuple(x.name for x in other_manifest.archives.list_considering(args))
if not archive_names:
archive_infos = other_manifest.archives.list_considering(args)
count = len(archive_infos)
if count == 0:
return
an_errors = []
for archive_name in archive_names:
for archive_info in archive_infos:
try:
archivename_validator(archive_name)
archivename_validator(archive_info.name)
except argparse.ArgumentTypeError as err:
an_errors.append(str(err))
if an_errors:
@ -48,12 +49,12 @@ class TransferMixIn:
raise Error("\n".join(an_errors))
ac_errors = []
for archive_name in archive_names:
archive = Archive(other_manifest, archive_name)
for archive_info in archive_infos:
archive = Archive(other_manifest, archive_info.id)
try:
comment_validator(archive.metadata.get("comment", ""))
except argparse.ArgumentTypeError as err:
ac_errors.append(f"{archive_name}: {err}")
ac_errors.append(f"{archive_info.name}: {err}")
if ac_errors:
ac_errors.insert(0, "Invalid archive comments detected, please fix them before transfer:")
raise Error("\n".join(ac_errors))
@ -75,7 +76,8 @@ class TransferMixIn:
upgrader = UpgraderCls(cache=cache)
for name in archive_names:
for archive_info in archive_infos:
name = archive_info.name
transfer_size = 0
present_size = 0
if manifest.archives.exists(name) and not dry_run:

View file

@ -15,7 +15,7 @@ from .constants import * # NOQA
from .helpers.datastruct import StableDict
from .helpers.parseformat import bin_to_hex, hex_to_bin
from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now
from .helpers.errors import Error
from .helpers.errors import Error, CommandError
from .patterns import get_regex_from_pattern
from .repoobj import RepoObj
@ -152,6 +152,10 @@ class Archives:
archive_info = dict(name=name, id=self._archives[name]["id"], time=self._archives[name]["time"])
yield None, archive_info
def _info_tuples(self):
for _, info in self._infos():
yield ArchiveInfo(name=info["name"], id=info["id"], ts=parse_timestamp(info["time"]))
def _lookup_name(self, name, raw=False):
assert isinstance(name, str)
assert not self.legacy
@ -159,12 +163,25 @@ class Archives:
if archive_info["name"] == name:
if not raw:
ts = parse_timestamp(archive_info["time"])
return store_key, ArchiveInfo(name=name, id=archive_info["id"], ts=ts)
return store_key, ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
else:
return store_key, archive_info
else:
raise KeyError(name)
def _lookup_id(self, id, raw=False):
assert isinstance(id, bytes)
assert not self.legacy
for store_key, archive_info in self._infos():
if archive_info["id"] == id:
if not raw:
ts = parse_timestamp(archive_info["time"])
return store_key, ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
else:
return store_key, archive_info
else:
raise KeyError(bin_to_hex(id))
def names(self):
# yield the names of all archives
if not self.legacy:
@ -191,6 +208,26 @@ class Archives:
else:
return dict(name=name, id=values["id"], time=values["time"])
def get_by_id(self, id, raw=False):
assert isinstance(id, bytes)
if not self.legacy:
try:
store_key, archive_info = self._lookup_id(id, raw=raw)
return archive_info
except KeyError:
return None
else:
for name, values in self._archives.items():
if id == values["id"]:
break
else:
return None
if not raw:
ts = parse_timestamp(values["time"])
return ArchiveInfo(name=name, id=values["id"], ts=ts)
else:
return dict(name=name, id=values["id"], time=values["time"])
def create(self, name, id, ts, *, overwrite=False):
assert isinstance(name, str)
assert isinstance(id, bytes)
@ -198,16 +235,6 @@ class Archives:
ts = ts.isoformat(timespec="microseconds")
assert isinstance(ts, str)
if not self.legacy:
try:
store_key, _ = self._lookup_name(name)
except KeyError:
pass
else:
# looks like we already have an archive list entry with that name
if not overwrite:
raise KeyError("archive already exists")
else:
self.repository.store_delete(f"archives/{store_key}")
archive = dict(name=name, id=id, time=ts)
value = self.manifest.key.pack_metadata(archive)
id = self.manifest.repo_objs.id_hash(value)
@ -228,6 +255,13 @@ class Archives:
else:
self._archives.pop(name)
def delete_by_id(self, id):
# delete an archive
assert isinstance(id, bytes)
assert not self.legacy
store_key, archive_info = self._lookup_id(id)
self.repository.store_delete(f"archives/{store_key}")
def list(
self,
*,
@ -262,22 +296,32 @@ class Archives:
if isinstance(sort_by, (str, bytes)):
raise TypeError("sort_by must be a sequence of str")
archives = [self.get(name) for name in self.names()]
regex = get_regex_from_pattern(match or "re:.*")
regex = re.compile(regex + match_end)
archives = [x for x in archives if regex.match(x.name) is not None]
archive_infos = self._info_tuples()
if match is None:
archive_infos = list(archive_infos)
elif match.startswith("aid:"): # do a match on the archive ID (prefix)
wanted_id = match.removeprefix("aid:")
archive_infos = [x for x in archive_infos if bin_to_hex(x.id).startswith(wanted_id)]
if len(archive_infos) != 1:
raise CommandError("archive ID based match needs to match precisely one archive ID")
else: # do a match on the name
regex = get_regex_from_pattern(match)
regex = re.compile(regex + match_end)
archive_infos = [x for x in archive_infos if regex.match(x.name) is not None]
if any([oldest, newest, older, newer]):
archives = filter_archives_by_date(archives, oldest=oldest, newest=newest, newer=newer, older=older)
archive_infos = filter_archives_by_date(
archive_infos, oldest=oldest, newest=newest, newer=newer, older=older
)
for sortkey in reversed(sort_by):
archives.sort(key=attrgetter(sortkey))
archive_infos.sort(key=attrgetter(sortkey))
if first:
archives = archives[:first]
archive_infos = archive_infos[:first]
elif last:
archives = archives[max(len(archives) - last, 0) :]
archive_infos = archive_infos[max(len(archive_infos) - last, 0) :]
if reverse:
archives.reverse()
return archives
archive_infos.reverse()
return archive_infos
def list_considering(self, args):
"""
@ -299,6 +343,21 @@ class Archives:
newest=getattr(args, "newest", None),
)
def get_one(self, match, *, match_end=r"\Z"):
"""get exactly one archive matching <match>"""
assert match is not None
archive_infos = self._info_tuples()
if match.startswith("aid:"): # do a match on the archive ID (prefix)
wanted_id = match.removeprefix("aid:")
archive_infos = [i for i in archive_infos if bin_to_hex(i.id).startswith(wanted_id)]
else: # do a match on the name
regex = get_regex_from_pattern(match)
regex = re.compile(regex + match_end)
archive_infos = [i for i in archive_infos if regex.match(i.name) is not None]
if len(archive_infos) != 1:
raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.")
return archive_infos[0]
def _set_raw_dict(self, d):
"""set the dict we get from the msgpack unpacker"""
for k, v in d.items():

View file

@ -5,7 +5,6 @@ from datetime import datetime
import pytest
from ...constants import * # NOQA
from ...helpers import CommandError
from .. import changedir, are_hardlinks_supported
from . import (
_create_test_caches,
@ -79,18 +78,6 @@ def test_recreate_hardlinked_tags(archivers, request): # test for issue #4911
# if issue #4911 is present, the recreate will crash with a KeyError for "input/file1"
def test_recreate_target_rc(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
if archiver.FORK_DEFAULT:
expected_ec = CommandError().exit_code
output = cmd(archiver, "recreate", "--target=asdf", exit_code=expected_ec)
assert "Need to specify single archive" in output
else:
with pytest.raises(CommandError):
cmd(archiver, "recreate", "--target=asdf")
def test_recreate_target(archivers, request):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path)

View file

@ -1,4 +1,3 @@
from ...archive import Archive
from ...constants import * # NOQA
from ...helpers import IncludePatternNeverMatchedWarning
from . import cmd_fixture, changedir # NOQA
@ -18,5 +17,3 @@ def test_return_codes(cmd_fixture, tmpdir):
assert rc == EXIT_SUCCESS
rc, out = cmd_fixture("--repo=%s" % repo, "extract", "archive", "does/not/match")
assert rc == IncludePatternNeverMatchedWarning().exit_code
rc, out = cmd_fixture("--repo=%s" % repo, "create", "archive", str(input))
assert rc == Archive.AlreadyExists().exit_code