recreate: remove --recompress option

For many use cases, the repo-wide "rcompress" is more efficient.

Also, recreate --recompress calls add_chunk with overwrite=True,
which is unsupported with the AdHocCache.
This commit is contained in:
Thomas Waldmann 2023-09-23 00:01:39 +02:00
parent a9e625d11f
commit 15c24cbe7e
No known key found for this signature in database
GPG Key ID: 243ACFA951F78E01
3 changed files with 6 additions and 70 deletions

View File

@ -23,7 +23,7 @@ from . import xattr
from .chunker import get_chunker, Chunk from .chunker import get_chunker, Chunk
from .cache import ChunkListEntry from .cache import ChunkListEntry
from .crypto.key import key_factory, UnsupportedPayloadError from .crypto.key import key_factory, UnsupportedPayloadError
from .compress import Compressor, CompressionSpec from .compress import CompressionSpec
from .constants import * # NOQA from .constants import * # NOQA
from .crypto.low_level import IntegrityError as IntegrityErrorBase from .crypto.low_level import IntegrityError as IntegrityErrorBase
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
@ -2349,8 +2349,6 @@ class ArchiveRecreater:
keep_exclude_tags=False, keep_exclude_tags=False,
chunker_params=None, chunker_params=None,
compression=None, compression=None,
recompress=False,
always_recompress=False,
dry_run=False, dry_run=False,
stats=False, stats=False,
progress=False, progress=False,
@ -2374,8 +2372,6 @@ class ArchiveRecreater:
if self.rechunkify: if self.rechunkify:
logger.debug("Rechunking archives to %s", chunker_params) logger.debug("Rechunking archives to %s", chunker_params)
self.chunker_params = chunker_params or CHUNKER_PARAMS self.chunker_params = chunker_params or CHUNKER_PARAMS
self.recompress = recompress
self.always_recompress = always_recompress
self.compression = compression or CompressionSpec("none") self.compression = compression or CompressionSpec("none")
self.seen_chunks = set() self.seen_chunks = set()
@ -2393,13 +2389,7 @@ class ArchiveRecreater:
target = self.create_target(archive, target_name) target = self.create_target(archive, target_name)
if self.exclude_if_present or self.exclude_caches: if self.exclude_if_present or self.exclude_caches:
self.matcher_add_tagged_dirs(archive) self.matcher_add_tagged_dirs(archive)
if ( if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
self.matcher.empty()
and not self.recompress
and not target.recreate_rechunkify
and comment is None
and target_name is None
):
# nothing to do # nothing to do
return False return False
self.process_items(archive, target) self.process_items(archive, target)
@ -2432,7 +2422,7 @@ class ArchiveRecreater:
self.print_file_status(status, item.path) self.print_file_status(status, item.path)
def process_chunks(self, archive, target, item): def process_chunks(self, archive, target, item):
if not self.recompress and not target.recreate_rechunkify: if not target.recreate_rechunkify:
for chunk_id, size in item.chunks: for chunk_id, size in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats) self.cache.chunk_incref(chunk_id, target.stats)
return item.chunks return item.chunks
@ -2444,19 +2434,7 @@ class ArchiveRecreater:
chunk_id, data = cached_hash(chunk, self.key.id_hash) chunk_id, data = cached_hash(chunk, self.key.id_hash)
if chunk_id in self.seen_chunks: if chunk_id in self.seen_chunks:
return self.cache.chunk_incref(chunk_id, target.stats) return self.cache.chunk_incref(chunk_id, target.stats)
overwrite = self.recompress chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
compressor_cls, level = Compressor.detect(compr_hdr)
if (
compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
and level == self.repo_objs.compressor.level
):
# Stored chunk has the same compression method and level as we wanted
overwrite = False
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
self.cache.repository.async_response(wait=False) self.cache.repository.async_response(wait=False)
self.seen_chunks.add(chunk_entry.id) self.seen_chunks.add(chunk_entry.id)
return chunk_entry return chunk_entry

View File

@ -21,8 +21,6 @@ class RecreateMixIn:
matcher = build_matcher(args.patterns, args.paths) matcher = build_matcher(args.patterns, args.paths)
self.output_list = args.output_list self.output_list = args.output_list
self.output_filter = args.output_filter self.output_filter = args.output_filter
recompress = args.recompress != "never"
always_recompress = args.recompress == "always"
recreater = ArchiveRecreater( recreater = ArchiveRecreater(
manifest, manifest,
@ -33,8 +31,6 @@ class RecreateMixIn:
keep_exclude_tags=args.keep_exclude_tags, keep_exclude_tags=args.keep_exclude_tags,
chunker_params=args.chunker_params, chunker_params=args.chunker_params,
compression=args.compression, compression=args.compression,
recompress=recompress,
always_recompress=always_recompress,
progress=args.progress, progress=args.progress,
stats=args.stats, stats=args.stats,
file_status_printer=self.print_file_status, file_status_printer=self.print_file_status,
@ -81,11 +77,6 @@ class RecreateMixIn:
Note that all paths in an archive are relative, therefore absolute patterns/paths Note that all paths in an archive are relative, therefore absolute patterns/paths
will *not* match (``--exclude``, ``--exclude-from``, PATHs). will *not* match (``--exclude``, ``--exclude-from``, PATHs).
``--recompress`` allows one to change the compression of existing data in archives.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not recreated at the same time.
There is no risk of data loss by this.
``--chunker-params`` will re-chunk all files in the archive, this can be ``--chunker-params`` will re-chunk all files in the archive, this can be
used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives. used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
@ -101,9 +92,9 @@ class RecreateMixIn:
With ``--target`` the original archive is not replaced, instead a new archive is created. With ``--target`` the original archive is not replaced, instead a new archive is created.
When rechunking (or recompressing), space usage can be substantial - expect When rechunking, space usage can be substantial - expect
at least the entire deduplicated size of the archives using the previous at least the entire deduplicated size of the archives using the previous
chunker (or compression) params. chunker params.
If you recently ran borg check --repair and it had to fix lost chunks with all-zero If you recently ran borg check --repair and it had to fix lost chunks with all-zero
replacement chunks, please first run another backup for the same data and re-run replacement chunks, please first run another backup for the same data and re-run
@ -201,25 +192,6 @@ class RecreateMixIn:
action=Highlander, action=Highlander,
help="select compression algorithm, see the output of the " '"borg help compression" command for details.', help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
) )
archive_group.add_argument(
"--recompress",
metavar="MODE",
dest="recompress",
nargs="?",
default="never",
const="if-different",
choices=("never", "if-different", "always"),
action=Highlander,
help="recompress data chunks according to `MODE` and ``--compression``. "
"Possible modes are "
"`if-different`: recompress if current compression is with a different "
"compression algorithm or different level; "
"`always`: recompress unconditionally; and "
"`never`: do not recompress (use this option explicitly to prevent "
"recompression). "
"If no MODE is given, `if-different` will be used. "
'Not passing --recompress is equivalent to "--recompress never".',
)
archive_group.add_argument( archive_group.add_argument(
"--chunker-params", "--chunker-params",
metavar="PARAMS", metavar="PARAMS",

View File

@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
assert num_chunks == num_chunks_after_recreate assert num_chunks == num_chunks_after_recreate
def test_recreate_recompress(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "compressible", size=10000)
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test", "input", "-C", "none")
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_before = file_list.split(" ")
cmd(archiver, "recreate", "-C", "lz4", "--recompress")
check_cache(archiver)
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_after = file_list.split(" ")
assert sha256_before == sha256_after
def test_recreate_timestamp(archivers, request): def test_recreate_timestamp(archivers, request):
archiver = request.getfixturevalue(archivers) archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path) create_test_files(archiver.input_path)