mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-02 21:25:26 +00:00
Merge pull request #7837 from ThomasWaldmann/remove-recreate-recompress
Remove recreate --recompress option
This commit is contained in:
commit
506718e82f
5 changed files with 9 additions and 80 deletions
|
@ -23,7 +23,7 @@
|
||||||
from .chunker import get_chunker, Chunk
|
from .chunker import get_chunker, Chunk
|
||||||
from .cache import ChunkListEntry
|
from .cache import ChunkListEntry
|
||||||
from .crypto.key import key_factory, UnsupportedPayloadError
|
from .crypto.key import key_factory, UnsupportedPayloadError
|
||||||
from .compress import Compressor, CompressionSpec
|
from .compress import CompressionSpec
|
||||||
from .constants import * # NOQA
|
from .constants import * # NOQA
|
||||||
from .crypto.low_level import IntegrityError as IntegrityErrorBase
|
from .crypto.low_level import IntegrityError as IntegrityErrorBase
|
||||||
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
|
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
|
||||||
|
@ -2349,8 +2349,6 @@ def __init__(
|
||||||
keep_exclude_tags=False,
|
keep_exclude_tags=False,
|
||||||
chunker_params=None,
|
chunker_params=None,
|
||||||
compression=None,
|
compression=None,
|
||||||
recompress=False,
|
|
||||||
always_recompress=False,
|
|
||||||
dry_run=False,
|
dry_run=False,
|
||||||
stats=False,
|
stats=False,
|
||||||
progress=False,
|
progress=False,
|
||||||
|
@ -2374,8 +2372,6 @@ def __init__(
|
||||||
if self.rechunkify:
|
if self.rechunkify:
|
||||||
logger.debug("Rechunking archives to %s", chunker_params)
|
logger.debug("Rechunking archives to %s", chunker_params)
|
||||||
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
||||||
self.recompress = recompress
|
|
||||||
self.always_recompress = always_recompress
|
|
||||||
self.compression = compression or CompressionSpec("none")
|
self.compression = compression or CompressionSpec("none")
|
||||||
self.seen_chunks = set()
|
self.seen_chunks = set()
|
||||||
|
|
||||||
|
@ -2393,13 +2389,7 @@ def recreate(self, archive_name, comment=None, target_name=None):
|
||||||
target = self.create_target(archive, target_name)
|
target = self.create_target(archive, target_name)
|
||||||
if self.exclude_if_present or self.exclude_caches:
|
if self.exclude_if_present or self.exclude_caches:
|
||||||
self.matcher_add_tagged_dirs(archive)
|
self.matcher_add_tagged_dirs(archive)
|
||||||
if (
|
if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
|
||||||
self.matcher.empty()
|
|
||||||
and not self.recompress
|
|
||||||
and not target.recreate_rechunkify
|
|
||||||
and comment is None
|
|
||||||
and target_name is None
|
|
||||||
):
|
|
||||||
# nothing to do
|
# nothing to do
|
||||||
return False
|
return False
|
||||||
self.process_items(archive, target)
|
self.process_items(archive, target)
|
||||||
|
@ -2432,7 +2422,7 @@ def process_item(self, archive, target, item):
|
||||||
self.print_file_status(status, item.path)
|
self.print_file_status(status, item.path)
|
||||||
|
|
||||||
def process_chunks(self, archive, target, item):
|
def process_chunks(self, archive, target, item):
|
||||||
if not self.recompress and not target.recreate_rechunkify:
|
if not target.recreate_rechunkify:
|
||||||
for chunk_id, size in item.chunks:
|
for chunk_id, size in item.chunks:
|
||||||
self.cache.chunk_incref(chunk_id, target.stats)
|
self.cache.chunk_incref(chunk_id, target.stats)
|
||||||
return item.chunks
|
return item.chunks
|
||||||
|
@ -2444,19 +2434,7 @@ def chunk_processor(self, target, chunk):
|
||||||
chunk_id, data = cached_hash(chunk, self.key.id_hash)
|
chunk_id, data = cached_hash(chunk, self.key.id_hash)
|
||||||
if chunk_id in self.seen_chunks:
|
if chunk_id in self.seen_chunks:
|
||||||
return self.cache.chunk_incref(chunk_id, target.stats)
|
return self.cache.chunk_incref(chunk_id, target.stats)
|
||||||
overwrite = self.recompress
|
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
|
||||||
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
|
|
||||||
# Check if this chunk is already compressed the way we want it
|
|
||||||
old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
|
|
||||||
compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
|
|
||||||
compressor_cls, level = Compressor.detect(compr_hdr)
|
|
||||||
if (
|
|
||||||
compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
|
|
||||||
and level == self.repo_objs.compressor.level
|
|
||||||
):
|
|
||||||
# Stored chunk has the same compression method and level as we wanted
|
|
||||||
overwrite = False
|
|
||||||
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
|
|
||||||
self.cache.repository.async_response(wait=False)
|
self.cache.repository.async_response(wait=False)
|
||||||
self.seen_chunks.add(chunk_entry.id)
|
self.seen_chunks.add(chunk_entry.id)
|
||||||
return chunk_entry
|
return chunk_entry
|
||||||
|
|
|
@ -21,8 +21,6 @@ def do_recreate(self, args, repository, manifest, cache):
|
||||||
matcher = build_matcher(args.patterns, args.paths)
|
matcher = build_matcher(args.patterns, args.paths)
|
||||||
self.output_list = args.output_list
|
self.output_list = args.output_list
|
||||||
self.output_filter = args.output_filter
|
self.output_filter = args.output_filter
|
||||||
recompress = args.recompress != "never"
|
|
||||||
always_recompress = args.recompress == "always"
|
|
||||||
|
|
||||||
recreater = ArchiveRecreater(
|
recreater = ArchiveRecreater(
|
||||||
manifest,
|
manifest,
|
||||||
|
@ -33,8 +31,6 @@ def do_recreate(self, args, repository, manifest, cache):
|
||||||
keep_exclude_tags=args.keep_exclude_tags,
|
keep_exclude_tags=args.keep_exclude_tags,
|
||||||
chunker_params=args.chunker_params,
|
chunker_params=args.chunker_params,
|
||||||
compression=args.compression,
|
compression=args.compression,
|
||||||
recompress=recompress,
|
|
||||||
always_recompress=always_recompress,
|
|
||||||
progress=args.progress,
|
progress=args.progress,
|
||||||
stats=args.stats,
|
stats=args.stats,
|
||||||
file_status_printer=self.print_file_status,
|
file_status_printer=self.print_file_status,
|
||||||
|
@ -81,11 +77,6 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
|
||||||
Note that all paths in an archive are relative, therefore absolute patterns/paths
|
Note that all paths in an archive are relative, therefore absolute patterns/paths
|
||||||
will *not* match (``--exclude``, ``--exclude-from``, PATHs).
|
will *not* match (``--exclude``, ``--exclude-from``, PATHs).
|
||||||
|
|
||||||
``--recompress`` allows one to change the compression of existing data in archives.
|
|
||||||
Due to how Borg stores compressed size information this might display
|
|
||||||
incorrect information for archives that were not recreated at the same time.
|
|
||||||
There is no risk of data loss by this.
|
|
||||||
|
|
||||||
``--chunker-params`` will re-chunk all files in the archive, this can be
|
``--chunker-params`` will re-chunk all files in the archive, this can be
|
||||||
used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
|
used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
|
||||||
|
|
||||||
|
@ -101,9 +92,9 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
|
||||||
|
|
||||||
With ``--target`` the original archive is not replaced, instead a new archive is created.
|
With ``--target`` the original archive is not replaced, instead a new archive is created.
|
||||||
|
|
||||||
When rechunking (or recompressing), space usage can be substantial - expect
|
When rechunking, space usage can be substantial - expect
|
||||||
at least the entire deduplicated size of the archives using the previous
|
at least the entire deduplicated size of the archives using the previous
|
||||||
chunker (or compression) params.
|
chunker params.
|
||||||
|
|
||||||
If you recently ran borg check --repair and it had to fix lost chunks with all-zero
|
If you recently ran borg check --repair and it had to fix lost chunks with all-zero
|
||||||
replacement chunks, please first run another backup for the same data and re-run
|
replacement chunks, please first run another backup for the same data and re-run
|
||||||
|
@ -201,25 +192,6 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
|
||||||
action=Highlander,
|
action=Highlander,
|
||||||
help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
|
help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
|
||||||
)
|
)
|
||||||
archive_group.add_argument(
|
|
||||||
"--recompress",
|
|
||||||
metavar="MODE",
|
|
||||||
dest="recompress",
|
|
||||||
nargs="?",
|
|
||||||
default="never",
|
|
||||||
const="if-different",
|
|
||||||
choices=("never", "if-different", "always"),
|
|
||||||
action=Highlander,
|
|
||||||
help="recompress data chunks according to `MODE` and ``--compression``. "
|
|
||||||
"Possible modes are "
|
|
||||||
"`if-different`: recompress if current compression is with a different "
|
|
||||||
"compression algorithm or different level; "
|
|
||||||
"`always`: recompress unconditionally; and "
|
|
||||||
"`never`: do not recompress (use this option explicitly to prevent "
|
|
||||||
"recompression). "
|
|
||||||
"If no MODE is given, `if-different` will be used. "
|
|
||||||
'Not passing --recompress is equivalent to "--recompress never".',
|
|
||||||
)
|
|
||||||
archive_group.add_argument(
|
archive_group.add_argument(
|
||||||
"--chunker-params",
|
"--chunker-params",
|
||||||
metavar="PARAMS",
|
metavar="PARAMS",
|
||||||
|
|
|
@ -939,15 +939,13 @@ def update_compatibility(self):
|
||||||
self.cache_config.ignored_features.update(repo_features - my_features)
|
self.cache_config.ignored_features.update(repo_features - my_features)
|
||||||
self.cache_config.mandatory_features.update(repo_features & my_features)
|
self.cache_config.mandatory_features.update(repo_features & my_features)
|
||||||
|
|
||||||
def add_chunk(
|
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
|
||||||
self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None, ctype=None, clevel=None
|
|
||||||
):
|
|
||||||
if not self.txn_active:
|
if not self.txn_active:
|
||||||
self.begin_txn()
|
self.begin_txn()
|
||||||
if size is None and compress:
|
if size is None and compress:
|
||||||
size = len(data) # data is still uncompressed
|
size = len(data) # data is still uncompressed
|
||||||
refcount = self.seen_chunk(id, size)
|
refcount = self.seen_chunk(id, size)
|
||||||
if refcount and not overwrite:
|
if refcount:
|
||||||
return self.chunk_incref(id, stats)
|
return self.chunk_incref(id, stats)
|
||||||
if size is None:
|
if size is None:
|
||||||
raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
|
raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
|
||||||
|
@ -1115,8 +1113,7 @@ def file_known_and_unchanged(self, hashed_path, path_hash, st):
|
||||||
def memorize_file(self, hashed_path, path_hash, st, ids):
|
def memorize_file(self, hashed_path, path_hash, st, ids):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_chunk(self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None):
|
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
|
||||||
assert not overwrite, "AdHocCache does not permit overwrites — trying to use it for recreate?"
|
|
||||||
if not self._txn_active:
|
if not self._txn_active:
|
||||||
self.begin_txn()
|
self.begin_txn()
|
||||||
if size is None and compress:
|
if size is None and compress:
|
||||||
|
|
|
@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
|
||||||
assert num_chunks == num_chunks_after_recreate
|
assert num_chunks == num_chunks_after_recreate
|
||||||
|
|
||||||
|
|
||||||
def test_recreate_recompress(archivers, request):
|
|
||||||
archiver = request.getfixturevalue(archivers)
|
|
||||||
create_regular_file(archiver.input_path, "compressible", size=10000)
|
|
||||||
cmd(archiver, "rcreate", RK_ENCRYPTION)
|
|
||||||
cmd(archiver, "create", "test", "input", "-C", "none")
|
|
||||||
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
|
|
||||||
size, sha256_before = file_list.split(" ")
|
|
||||||
cmd(archiver, "recreate", "-C", "lz4", "--recompress")
|
|
||||||
check_cache(archiver)
|
|
||||||
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
|
|
||||||
size, sha256_after = file_list.split(" ")
|
|
||||||
assert sha256_before == sha256_after
|
|
||||||
|
|
||||||
|
|
||||||
def test_recreate_timestamp(archivers, request):
|
def test_recreate_timestamp(archivers, request):
|
||||||
archiver = request.getfixturevalue(archivers)
|
archiver = request.getfixturevalue(archivers)
|
||||||
create_test_files(archiver.input_path)
|
create_test_files(archiver.input_path)
|
||||||
|
|
|
@ -192,10 +192,6 @@ def test_does_not_delete_existing_chunks(self, repository, cache):
|
||||||
cache.chunk_decref(H(1), Statistics())
|
cache.chunk_decref(H(1), Statistics())
|
||||||
assert repository.get(H(1)) == b"1234"
|
assert repository.get(H(1)) == b"1234"
|
||||||
|
|
||||||
def test_does_not_overwrite(self, cache):
|
|
||||||
with pytest.raises(AssertionError):
|
|
||||||
cache.add_chunk(H(1), {}, b"5678", stats=Statistics(), overwrite=True)
|
|
||||||
|
|
||||||
def test_seen_chunk_add_chunk_size(self, cache):
|
def test_seen_chunk_add_chunk_size(self, cache):
|
||||||
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
|
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue