1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-02 21:25:26 +00:00

Merge pull request #7837 from ThomasWaldmann/remove-recreate-recompress

Remove recreate --recompress option
This commit is contained in:
TW 2023-09-24 18:16:35 +02:00 committed by GitHub
commit 506718e82f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 9 additions and 80 deletions

View file

@ -23,7 +23,7 @@
from .chunker import get_chunker, Chunk from .chunker import get_chunker, Chunk
from .cache import ChunkListEntry from .cache import ChunkListEntry
from .crypto.key import key_factory, UnsupportedPayloadError from .crypto.key import key_factory, UnsupportedPayloadError
from .compress import Compressor, CompressionSpec from .compress import CompressionSpec
from .constants import * # NOQA from .constants import * # NOQA
from .crypto.low_level import IntegrityError as IntegrityErrorBase from .crypto.low_level import IntegrityError as IntegrityErrorBase
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
@ -2349,8 +2349,6 @@ def __init__(
keep_exclude_tags=False, keep_exclude_tags=False,
chunker_params=None, chunker_params=None,
compression=None, compression=None,
recompress=False,
always_recompress=False,
dry_run=False, dry_run=False,
stats=False, stats=False,
progress=False, progress=False,
@ -2374,8 +2372,6 @@ def __init__(
if self.rechunkify: if self.rechunkify:
logger.debug("Rechunking archives to %s", chunker_params) logger.debug("Rechunking archives to %s", chunker_params)
self.chunker_params = chunker_params or CHUNKER_PARAMS self.chunker_params = chunker_params or CHUNKER_PARAMS
self.recompress = recompress
self.always_recompress = always_recompress
self.compression = compression or CompressionSpec("none") self.compression = compression or CompressionSpec("none")
self.seen_chunks = set() self.seen_chunks = set()
@ -2393,13 +2389,7 @@ def recreate(self, archive_name, comment=None, target_name=None):
target = self.create_target(archive, target_name) target = self.create_target(archive, target_name)
if self.exclude_if_present or self.exclude_caches: if self.exclude_if_present or self.exclude_caches:
self.matcher_add_tagged_dirs(archive) self.matcher_add_tagged_dirs(archive)
if ( if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
self.matcher.empty()
and not self.recompress
and not target.recreate_rechunkify
and comment is None
and target_name is None
):
# nothing to do # nothing to do
return False return False
self.process_items(archive, target) self.process_items(archive, target)
@ -2432,7 +2422,7 @@ def process_item(self, archive, target, item):
self.print_file_status(status, item.path) self.print_file_status(status, item.path)
def process_chunks(self, archive, target, item): def process_chunks(self, archive, target, item):
if not self.recompress and not target.recreate_rechunkify: if not target.recreate_rechunkify:
for chunk_id, size in item.chunks: for chunk_id, size in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats) self.cache.chunk_incref(chunk_id, target.stats)
return item.chunks return item.chunks
@ -2444,19 +2434,7 @@ def chunk_processor(self, target, chunk):
chunk_id, data = cached_hash(chunk, self.key.id_hash) chunk_id, data = cached_hash(chunk, self.key.id_hash)
if chunk_id in self.seen_chunks: if chunk_id in self.seen_chunks:
return self.cache.chunk_incref(chunk_id, target.stats) return self.cache.chunk_incref(chunk_id, target.stats)
overwrite = self.recompress chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
compressor_cls, level = Compressor.detect(compr_hdr)
if (
compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
and level == self.repo_objs.compressor.level
):
# Stored chunk has the same compression method and level as we wanted
overwrite = False
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
self.cache.repository.async_response(wait=False) self.cache.repository.async_response(wait=False)
self.seen_chunks.add(chunk_entry.id) self.seen_chunks.add(chunk_entry.id)
return chunk_entry return chunk_entry

View file

@ -21,8 +21,6 @@ def do_recreate(self, args, repository, manifest, cache):
matcher = build_matcher(args.patterns, args.paths) matcher = build_matcher(args.patterns, args.paths)
self.output_list = args.output_list self.output_list = args.output_list
self.output_filter = args.output_filter self.output_filter = args.output_filter
recompress = args.recompress != "never"
always_recompress = args.recompress == "always"
recreater = ArchiveRecreater( recreater = ArchiveRecreater(
manifest, manifest,
@ -33,8 +31,6 @@ def do_recreate(self, args, repository, manifest, cache):
keep_exclude_tags=args.keep_exclude_tags, keep_exclude_tags=args.keep_exclude_tags,
chunker_params=args.chunker_params, chunker_params=args.chunker_params,
compression=args.compression, compression=args.compression,
recompress=recompress,
always_recompress=always_recompress,
progress=args.progress, progress=args.progress,
stats=args.stats, stats=args.stats,
file_status_printer=self.print_file_status, file_status_printer=self.print_file_status,
@ -81,11 +77,6 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
Note that all paths in an archive are relative, therefore absolute patterns/paths Note that all paths in an archive are relative, therefore absolute patterns/paths
will *not* match (``--exclude``, ``--exclude-from``, PATHs). will *not* match (``--exclude``, ``--exclude-from``, PATHs).
``--recompress`` allows one to change the compression of existing data in archives.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not recreated at the same time.
There is no risk of data loss by this.
``--chunker-params`` will re-chunk all files in the archive, this can be ``--chunker-params`` will re-chunk all files in the archive, this can be
used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives. used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
@ -101,9 +92,9 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
With ``--target`` the original archive is not replaced, instead a new archive is created. With ``--target`` the original archive is not replaced, instead a new archive is created.
When rechunking (or recompressing), space usage can be substantial - expect When rechunking, space usage can be substantial - expect
at least the entire deduplicated size of the archives using the previous at least the entire deduplicated size of the archives using the previous
chunker (or compression) params. chunker params.
If you recently ran borg check --repair and it had to fix lost chunks with all-zero If you recently ran borg check --repair and it had to fix lost chunks with all-zero
replacement chunks, please first run another backup for the same data and re-run replacement chunks, please first run another backup for the same data and re-run
@ -201,25 +192,6 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
action=Highlander, action=Highlander,
help="select compression algorithm, see the output of the " '"borg help compression" command for details.', help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
) )
archive_group.add_argument(
"--recompress",
metavar="MODE",
dest="recompress",
nargs="?",
default="never",
const="if-different",
choices=("never", "if-different", "always"),
action=Highlander,
help="recompress data chunks according to `MODE` and ``--compression``. "
"Possible modes are "
"`if-different`: recompress if current compression is with a different "
"compression algorithm or different level; "
"`always`: recompress unconditionally; and "
"`never`: do not recompress (use this option explicitly to prevent "
"recompression). "
"If no MODE is given, `if-different` will be used. "
'Not passing --recompress is equivalent to "--recompress never".',
)
archive_group.add_argument( archive_group.add_argument(
"--chunker-params", "--chunker-params",
metavar="PARAMS", metavar="PARAMS",

View file

@ -939,15 +939,13 @@ def update_compatibility(self):
self.cache_config.ignored_features.update(repo_features - my_features) self.cache_config.ignored_features.update(repo_features - my_features)
self.cache_config.mandatory_features.update(repo_features & my_features) self.cache_config.mandatory_features.update(repo_features & my_features)
def add_chunk( def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None, ctype=None, clevel=None
):
if not self.txn_active: if not self.txn_active:
self.begin_txn() self.begin_txn()
if size is None and compress: if size is None and compress:
size = len(data) # data is still uncompressed size = len(data) # data is still uncompressed
refcount = self.seen_chunk(id, size) refcount = self.seen_chunk(id, size)
if refcount and not overwrite: if refcount:
return self.chunk_incref(id, stats) return self.chunk_incref(id, stats)
if size is None: if size is None:
raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also") raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
@ -1115,8 +1113,7 @@ def file_known_and_unchanged(self, hashed_path, path_hash, st):
def memorize_file(self, hashed_path, path_hash, st, ids): def memorize_file(self, hashed_path, path_hash, st, ids):
pass pass
def add_chunk(self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None): def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
assert not overwrite, "AdHocCache does not permit overwrites — trying to use it for recreate?"
if not self._txn_active: if not self._txn_active:
self.begin_txn() self.begin_txn()
if size is None and compress: if size is None and compress:

View file

@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
assert num_chunks == num_chunks_after_recreate assert num_chunks == num_chunks_after_recreate
def test_recreate_recompress(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "compressible", size=10000)
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test", "input", "-C", "none")
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_before = file_list.split(" ")
cmd(archiver, "recreate", "-C", "lz4", "--recompress")
check_cache(archiver)
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_after = file_list.split(" ")
assert sha256_before == sha256_after
def test_recreate_timestamp(archivers, request): def test_recreate_timestamp(archivers, request):
archiver = request.getfixturevalue(archivers) archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path) create_test_files(archiver.input_path)

View file

@ -192,10 +192,6 @@ def test_does_not_delete_existing_chunks(self, repository, cache):
cache.chunk_decref(H(1), Statistics()) cache.chunk_decref(H(1), Statistics())
assert repository.get(H(1)) == b"1234" assert repository.get(H(1)) == b"1234"
def test_does_not_overwrite(self, cache):
with pytest.raises(AssertionError):
cache.add_chunk(H(1), {}, b"5678", stats=Statistics(), overwrite=True)
def test_seen_chunk_add_chunk_size(self, cache): def test_seen_chunk_add_chunk_size(self, cache):
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4) assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)