mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-23 08:16:54 +00:00
Merge pull request #3230 from ThomasWaldmann/chunks-healthy-recreate-fix
recreate / chunks_healthy fixes
This commit is contained in:
commit
fcef52a5d8
2 changed files with 30 additions and 7 deletions
|
@ -967,13 +967,14 @@ class ChunksProcessor:
|
|||
|
||||
def __init__(self, *, key, cache,
|
||||
add_item, write_checkpoint,
|
||||
checkpoint_interval):
|
||||
checkpoint_interval, rechunkify):
|
||||
self.key = key
|
||||
self.cache = cache
|
||||
self.add_item = add_item
|
||||
self.write_checkpoint = write_checkpoint
|
||||
self.checkpoint_interval = checkpoint_interval
|
||||
self.last_checkpoint = time.monotonic()
|
||||
self.rechunkify = rechunkify
|
||||
|
||||
def write_part_file(self, item, from_chunk, number):
|
||||
item = Item(internal_dict=item.as_dict())
|
||||
|
@ -998,6 +999,10 @@ def chunk_processor(data):
|
|||
return chunk_entry
|
||||
|
||||
item.chunks = []
|
||||
# if we rechunkify, we'll get a fundamentally different chunks list, thus we need
|
||||
# to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
|
||||
if self.rechunkify and 'chunks_healthy' in item:
|
||||
del item.chunks_healthy
|
||||
from_chunk = 0
|
||||
part_number = 1
|
||||
for data in chunk_iter:
|
||||
|
@ -1502,7 +1507,12 @@ def replacement_chunk(size):
|
|||
has_chunks_healthy = 'chunks_healthy' in item
|
||||
chunks_current = item.chunks
|
||||
chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
|
||||
assert len(chunks_current) == len(chunks_healthy)
|
||||
if has_chunks_healthy and len(chunks_current) != len(chunks_healthy):
|
||||
# should never happen, but there was issue #3218.
|
||||
logger.warning('{}: Invalid chunks_healthy metadata removed!'.format(item.path))
|
||||
del item.chunks_healthy
|
||||
has_chunks_healthy = False
|
||||
chunks_healthy = chunks_current
|
||||
for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
|
||||
chunk_id, size, csize = chunk_healthy
|
||||
if chunk_id not in self.chunks:
|
||||
|
@ -1758,15 +1768,17 @@ def item_is_hardlink_master(item):
|
|||
if not matcher.match(item.path):
|
||||
self.print_file_status('x', item.path)
|
||||
if item_is_hardlink_master(item):
|
||||
hardlink_masters[item.path] = (item.get('chunks'), None)
|
||||
hardlink_masters[item.path] = (item.get('chunks'), item.get('chunks_healthy'), None)
|
||||
continue
|
||||
if target_is_subset and hardlinkable(item.mode) and item.get('source') in hardlink_masters:
|
||||
# master of this hard link is outside the target subset
|
||||
chunks, new_source = hardlink_masters[item.source]
|
||||
chunks, chunks_healthy, new_source = hardlink_masters[item.source]
|
||||
if new_source is None:
|
||||
# First item to use this master, move the chunks
|
||||
item.chunks = chunks
|
||||
hardlink_masters[item.source] = (None, item.path)
|
||||
if chunks_healthy is not None:
|
||||
item.chunks_healthy = chunks_healthy
|
||||
hardlink_masters[item.source] = (None, None, item.path)
|
||||
del item.source
|
||||
else:
|
||||
# Master was already moved, only update this item's source
|
||||
|
@ -1891,7 +1903,7 @@ def create_target(self, archive, target_name=None):
|
|||
target.process_file_chunks = ChunksProcessor(
|
||||
cache=self.cache, key=self.key,
|
||||
add_item=target.add_item, write_checkpoint=target.write_checkpoint,
|
||||
checkpoint_interval=self.checkpoint_interval).process_file_chunks
|
||||
checkpoint_interval=self.checkpoint_interval, rechunkify=target.recreate_rechunkify).process_file_chunks
|
||||
target.chunker = Chunker(self.key.chunk_seed, *target.chunker_params)
|
||||
return target
|
||||
|
||||
|
|
|
@ -519,7 +519,7 @@ def create_inner(archive, cache, fso):
|
|||
nobsdflags=args.nobsdflags, numeric_owner=args.numeric_owner)
|
||||
cp = ChunksProcessor(cache=cache, key=key,
|
||||
add_item=archive.add_item, write_checkpoint=archive.write_checkpoint,
|
||||
checkpoint_interval=args.checkpoint_interval)
|
||||
checkpoint_interval=args.checkpoint_interval, rechunkify=False)
|
||||
fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
|
||||
process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
|
||||
chunker_params=args.chunker_params)
|
||||
|
@ -3371,6 +3371,17 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
|
|||
deduplicated size of the archives using the previous chunker params.
|
||||
When recompressing expect approx. (throughput / checkpoint-interval) in space usage,
|
||||
assuming all chunks are recompressed.
|
||||
|
||||
If you recently ran borg check --repair and it had to fix lost chunks with all-zero
|
||||
replacement chunks, please first run another backup for the same data and re-run
|
||||
borg check --repair afterwards to heal any archives that had lost chunks which are
|
||||
still generated from the input data.
|
||||
|
||||
Important: running borg recreate to re-chunk will remove the chunks_healthy
|
||||
metadata of all items with replacement chunks, so healing will not be possible
|
||||
any more after re-chunking (it is also unlikely it would ever work: due to the
|
||||
change of chunking parameters, the missing chunk likely will never be seen again
|
||||
even if you still have the data that produced it).
|
||||
""")
|
||||
subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
|
||||
description=self.do_recreate.__doc__,
|
||||
|
|
Loading…
Reference in a new issue