Merge pull request #7837 from ThomasWaldmann/remove-recreate-recompress

Remove recreate --recompress option
This commit is contained in:
TW 2023-09-24 18:16:35 +02:00 committed by GitHub
commit 506718e82f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 9 additions and 80 deletions

View File

@ -23,7 +23,7 @@ from . import xattr
from .chunker import get_chunker, Chunk
from .cache import ChunkListEntry
from .crypto.key import key_factory, UnsupportedPayloadError
from .compress import Compressor, CompressionSpec
from .compress import CompressionSpec
from .constants import * # NOQA
from .crypto.low_level import IntegrityError as IntegrityErrorBase
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
@ -2349,8 +2349,6 @@ class ArchiveRecreater:
keep_exclude_tags=False,
chunker_params=None,
compression=None,
recompress=False,
always_recompress=False,
dry_run=False,
stats=False,
progress=False,
@ -2374,8 +2372,6 @@ class ArchiveRecreater:
if self.rechunkify:
logger.debug("Rechunking archives to %s", chunker_params)
self.chunker_params = chunker_params or CHUNKER_PARAMS
self.recompress = recompress
self.always_recompress = always_recompress
self.compression = compression or CompressionSpec("none")
self.seen_chunks = set()
@ -2393,13 +2389,7 @@ class ArchiveRecreater:
target = self.create_target(archive, target_name)
if self.exclude_if_present or self.exclude_caches:
self.matcher_add_tagged_dirs(archive)
if (
self.matcher.empty()
and not self.recompress
and not target.recreate_rechunkify
and comment is None
and target_name is None
):
if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
# nothing to do
return False
self.process_items(archive, target)
@ -2432,7 +2422,7 @@ class ArchiveRecreater:
self.print_file_status(status, item.path)
def process_chunks(self, archive, target, item):
if not self.recompress and not target.recreate_rechunkify:
if not target.recreate_rechunkify:
for chunk_id, size in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats)
return item.chunks
@ -2444,19 +2434,7 @@ class ArchiveRecreater:
chunk_id, data = cached_hash(chunk, self.key.id_hash)
if chunk_id in self.seen_chunks:
return self.cache.chunk_incref(chunk_id, target.stats)
overwrite = self.recompress
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
compressor_cls, level = Compressor.detect(compr_hdr)
if (
compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
and level == self.repo_objs.compressor.level
):
# Stored chunk has the same compression method and level as we wanted
overwrite = False
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
self.cache.repository.async_response(wait=False)
self.seen_chunks.add(chunk_entry.id)
return chunk_entry

View File

@ -21,8 +21,6 @@ class RecreateMixIn:
matcher = build_matcher(args.patterns, args.paths)
self.output_list = args.output_list
self.output_filter = args.output_filter
recompress = args.recompress != "never"
always_recompress = args.recompress == "always"
recreater = ArchiveRecreater(
manifest,
@ -33,8 +31,6 @@ class RecreateMixIn:
keep_exclude_tags=args.keep_exclude_tags,
chunker_params=args.chunker_params,
compression=args.compression,
recompress=recompress,
always_recompress=always_recompress,
progress=args.progress,
stats=args.stats,
file_status_printer=self.print_file_status,
@ -81,11 +77,6 @@ class RecreateMixIn:
Note that all paths in an archive are relative, therefore absolute patterns/paths
will *not* match (``--exclude``, ``--exclude-from``, PATHs).
``--recompress`` allows one to change the compression of existing data in archives.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not recreated at the same time.
There is no risk of data loss by this.
``--chunker-params`` will re-chunk all files in the archive, this can be
used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
@ -101,9 +92,9 @@ class RecreateMixIn:
With ``--target`` the original archive is not replaced, instead a new archive is created.
When rechunking (or recompressing), space usage can be substantial - expect
When rechunking, space usage can be substantial - expect
at least the entire deduplicated size of the archives using the previous
chunker (or compression) params.
chunker params.
If you recently ran borg check --repair and it had to fix lost chunks with all-zero
replacement chunks, please first run another backup for the same data and re-run
@ -201,25 +192,6 @@ class RecreateMixIn:
action=Highlander,
help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
)
archive_group.add_argument(
"--recompress",
metavar="MODE",
dest="recompress",
nargs="?",
default="never",
const="if-different",
choices=("never", "if-different", "always"),
action=Highlander,
help="recompress data chunks according to `MODE` and ``--compression``. "
"Possible modes are "
"`if-different`: recompress if current compression is with a different "
"compression algorithm or different level; "
"`always`: recompress unconditionally; and "
"`never`: do not recompress (use this option explicitly to prevent "
"recompression). "
"If no MODE is given, `if-different` will be used. "
'Not passing --recompress is equivalent to "--recompress never".',
)
archive_group.add_argument(
"--chunker-params",
metavar="PARAMS",

View File

@ -939,15 +939,13 @@ class LocalCache(CacheStatsMixin):
self.cache_config.ignored_features.update(repo_features - my_features)
self.cache_config.mandatory_features.update(repo_features & my_features)
def add_chunk(
self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None, ctype=None, clevel=None
):
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
if not self.txn_active:
self.begin_txn()
if size is None and compress:
size = len(data) # data is still uncompressed
refcount = self.seen_chunk(id, size)
if refcount and not overwrite:
if refcount:
return self.chunk_incref(id, stats)
if size is None:
raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
@ -1115,8 +1113,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown"""
def memorize_file(self, hashed_path, path_hash, st, ids):
pass
def add_chunk(self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None):
assert not overwrite, "AdHocCache does not permit overwrites — trying to use it for recreate?"
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
if not self._txn_active:
self.begin_txn()
if size is None and compress:

View File

@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
assert num_chunks == num_chunks_after_recreate
def test_recreate_recompress(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "compressible", size=10000)
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test", "input", "-C", "none")
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_before = file_list.split(" ")
cmd(archiver, "recreate", "-C", "lz4", "--recompress")
check_cache(archiver)
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_after = file_list.split(" ")
assert sha256_before == sha256_after
def test_recreate_timestamp(archivers, request):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path)

View File

@ -192,10 +192,6 @@ class TestAdHocCache:
cache.chunk_decref(H(1), Statistics())
assert repository.get(H(1)) == b"1234"
def test_does_not_overwrite(self, cache):
with pytest.raises(AssertionError):
cache.add_chunk(H(1), {}, b"5678", stats=Statistics(), overwrite=True)
def test_seen_chunk_add_chunk_size(self, cache):
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)