mirror of https://github.com/borgbackup/borg.git
commit
e9d7f928e2
|
@ -1394,10 +1394,6 @@ class ArchiveChecker:
|
||||||
|
|
||||||
|
|
||||||
class ArchiveRecreater:
|
class ArchiveRecreater:
|
||||||
class FakeTargetArchive:
|
|
||||||
def __init__(self):
|
|
||||||
self.stats = Statistics()
|
|
||||||
|
|
||||||
class Interrupted(Exception):
|
class Interrupted(Exception):
|
||||||
def __init__(self, metadata=None):
|
def __init__(self, metadata=None):
|
||||||
self.metadata = metadata or {}
|
self.metadata = metadata or {}
|
||||||
|
@ -1421,6 +1417,9 @@ class ArchiveRecreater:
|
||||||
self.exclude_if_present = exclude_if_present or []
|
self.exclude_if_present = exclude_if_present or []
|
||||||
self.keep_tag_files = keep_tag_files
|
self.keep_tag_files = keep_tag_files
|
||||||
|
|
||||||
|
self.rechunkify = chunker_params is not None
|
||||||
|
if self.rechunkify:
|
||||||
|
logger.debug('Rechunking archives to %s', chunker_params)
|
||||||
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
||||||
self.recompress = bool(compression)
|
self.recompress = bool(compression)
|
||||||
self.always_recompress = always_recompress
|
self.always_recompress = always_recompress
|
||||||
|
@ -1434,7 +1433,7 @@ class ArchiveRecreater:
|
||||||
self.stats = stats
|
self.stats = stats
|
||||||
self.progress = progress
|
self.progress = progress
|
||||||
self.print_file_status = file_status_printer or (lambda *args: None)
|
self.print_file_status = file_status_printer or (lambda *args: None)
|
||||||
self.checkpoint_interval = checkpoint_interval
|
self.checkpoint_interval = None if dry_run else checkpoint_interval
|
||||||
|
|
||||||
def recreate(self, archive_name, comment=None, target_name=None):
|
def recreate(self, archive_name, comment=None, target_name=None):
|
||||||
assert not self.is_temporary_archive(archive_name)
|
assert not self.is_temporary_archive(archive_name)
|
||||||
|
@ -1444,10 +1443,10 @@ class ArchiveRecreater:
|
||||||
self.matcher_add_tagged_dirs(archive)
|
self.matcher_add_tagged_dirs(archive)
|
||||||
if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
|
if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
|
||||||
logger.info("Skipping archive %s, nothing to do", archive_name)
|
logger.info("Skipping archive %s, nothing to do", archive_name)
|
||||||
return True
|
return
|
||||||
self.process_items(archive, target)
|
self.process_items(archive, target)
|
||||||
replace_original = target_name is None
|
replace_original = target_name is None
|
||||||
return self.save(archive, target, comment, replace_original=replace_original)
|
self.save(archive, target, comment, replace_original=replace_original)
|
||||||
|
|
||||||
def process_items(self, archive, target):
|
def process_items(self, archive, target):
|
||||||
matcher = self.matcher
|
matcher = self.matcher
|
||||||
|
@ -1494,12 +1493,11 @@ class ArchiveRecreater:
|
||||||
self.print_file_status(file_status(item.mode), item.path)
|
self.print_file_status(file_status(item.mode), item.path)
|
||||||
|
|
||||||
def process_chunks(self, archive, target, item):
|
def process_chunks(self, archive, target, item):
|
||||||
"""Return new chunk ID list for 'item'."""
|
|
||||||
if not self.recompress and not target.recreate_rechunkify:
|
if not self.recompress and not target.recreate_rechunkify:
|
||||||
for chunk_id, size, csize in item.chunks:
|
for chunk_id, size, csize in item.chunks:
|
||||||
self.cache.chunk_incref(chunk_id, target.stats)
|
self.cache.chunk_incref(chunk_id, target.stats)
|
||||||
return item.chunks
|
return item.chunks
|
||||||
chunk_iterator = self.create_chunk_iterator(archive, target, list(item.chunks))
|
chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
|
||||||
compress = self.compression_decider1.decide(item.path)
|
compress = self.compression_decider1.decide(item.path)
|
||||||
chunk_processor = partial(self.chunk_processor, target, compress)
|
chunk_processor = partial(self.chunk_processor, target, compress)
|
||||||
target.chunk_file(item, self.cache, target.stats, chunk_iterator, chunk_processor)
|
target.chunk_file(item, self.cache, target.stats, chunk_iterator, chunk_processor)
|
||||||
|
@ -1517,24 +1515,22 @@ class ArchiveRecreater:
|
||||||
if Compressor.detect(old_chunk.data).name == compression_spec['name']:
|
if Compressor.detect(old_chunk.data).name == compression_spec['name']:
|
||||||
# Stored chunk has the same compression we wanted
|
# Stored chunk has the same compression we wanted
|
||||||
overwrite = False
|
overwrite = False
|
||||||
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
|
chunk_entry = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
|
||||||
self.seen_chunks.add(chunk_id)
|
self.seen_chunks.add(chunk_entry.id)
|
||||||
return chunk_id, size, csize
|
return chunk_entry
|
||||||
|
|
||||||
def create_chunk_iterator(self, archive, target, chunks):
|
def iter_chunks(self, archive, target, chunks):
|
||||||
"""Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
|
|
||||||
chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in chunks])
|
chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in chunks])
|
||||||
if target.recreate_rechunkify:
|
if target.recreate_rechunkify:
|
||||||
# The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
|
# The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
|
||||||
# (does not load the entire file into memory)
|
# (does not load the entire file into memory)
|
||||||
file = ChunkIteratorFileWrapper(chunk_iterator)
|
file = ChunkIteratorFileWrapper(chunk_iterator)
|
||||||
return target.chunker.chunkify(file)
|
yield from target.chunker.chunkify(file)
|
||||||
else:
|
else:
|
||||||
for chunk in chunk_iterator:
|
for chunk in chunk_iterator:
|
||||||
yield chunk.data
|
yield chunk.data
|
||||||
|
|
||||||
def save(self, archive, target, comment=None, replace_original=True):
|
def save(self, archive, target, comment=None, replace_original=True):
|
||||||
"""Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
|
|
||||||
if self.dry_run:
|
if self.dry_run:
|
||||||
return
|
return
|
||||||
timestamp = archive.ts.replace(tzinfo=None)
|
timestamp = archive.ts.replace(tzinfo=None)
|
||||||
|
@ -1591,12 +1587,13 @@ class ArchiveRecreater:
|
||||||
|
|
||||||
def create_target(self, archive, target_name=None):
|
def create_target(self, archive, target_name=None):
|
||||||
"""Create target archive."""
|
"""Create target archive."""
|
||||||
if self.dry_run:
|
|
||||||
return self.FakeTargetArchive(), None
|
|
||||||
target_name = target_name or archive.name + '.recreate'
|
target_name = target_name or archive.name + '.recreate'
|
||||||
target = self.create_target_archive(target_name)
|
target = self.create_target_archive(target_name)
|
||||||
# If the archives use the same chunker params, then don't rechunkify
|
# If the archives use the same chunker params, then don't rechunkify
|
||||||
target.recreate_rechunkify = tuple(archive.metadata.get('chunker_params', [])) != self.chunker_params
|
source_chunker_params = tuple(archive.metadata.get('chunker_params', []))
|
||||||
|
target.recreate_rechunkify = self.rechunkify and source_chunker_params != target.chunker_params
|
||||||
|
if target.recreate_rechunkify:
|
||||||
|
logger.debug('Rechunking archive from %s to %s', source_chunker_params or '(unknown)', target.chunker_params)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def create_target_archive(self, name):
|
def create_target_archive(self, name):
|
||||||
|
|
|
@ -1101,11 +1101,11 @@ class Archiver:
|
||||||
if recreater.is_temporary_archive(name):
|
if recreater.is_temporary_archive(name):
|
||||||
continue
|
continue
|
||||||
print('Processing', name)
|
print('Processing', name)
|
||||||
if not recreater.recreate(name, args.comment):
|
recreater.recreate(name, args.comment)
|
||||||
break
|
if not args.dry_run:
|
||||||
manifest.write()
|
manifest.write()
|
||||||
repository.commit()
|
repository.commit()
|
||||||
cache.commit()
|
cache.commit()
|
||||||
return self.exit_code
|
return self.exit_code
|
||||||
|
|
||||||
@with_repository(manifest=False, exclusive=True)
|
@with_repository(manifest=False, exclusive=True)
|
||||||
|
@ -2356,6 +2356,8 @@ class Archiver:
|
||||||
recreate_epilog = textwrap.dedent("""
|
recreate_epilog = textwrap.dedent("""
|
||||||
Recreate the contents of existing archives.
|
Recreate the contents of existing archives.
|
||||||
|
|
||||||
|
This is an *experimental* feature. Do *not* use this on your only backup.
|
||||||
|
|
||||||
--exclude, --exclude-from and PATH have the exact same semantics
|
--exclude, --exclude-from and PATH have the exact same semantics
|
||||||
as in "borg create". If PATHs are specified the resulting archive
|
as in "borg create". If PATHs are specified the resulting archive
|
||||||
will only contain files from these PATHs.
|
will only contain files from these PATHs.
|
||||||
|
@ -2372,15 +2374,6 @@ class Archiver:
|
||||||
used to have upgraded Borg 0.xx or Attic archives deduplicate with
|
used to have upgraded Borg 0.xx or Attic archives deduplicate with
|
||||||
Borg 1.x archives.
|
Borg 1.x archives.
|
||||||
|
|
||||||
borg recreate is signal safe. Send either SIGINT (Ctrl-C on most terminals) or
|
|
||||||
SIGTERM to request termination.
|
|
||||||
|
|
||||||
Use the *exact same* command line to resume the operation later - changing excludes
|
|
||||||
or paths will lead to inconsistencies (changed excludes will only apply to newly
|
|
||||||
processed files/dirs). Changing compression leads to incorrect size information
|
|
||||||
(which does not cause any data loss, but can be misleading).
|
|
||||||
Changing chunker params between invocations might lead to data loss.
|
|
||||||
|
|
||||||
USE WITH CAUTION.
|
USE WITH CAUTION.
|
||||||
Depending on the PATHs and patterns given, recreate can be used to permanently
|
Depending on the PATHs and patterns given, recreate can be used to permanently
|
||||||
delete files from archives.
|
delete files from archives.
|
||||||
|
@ -2395,8 +2388,8 @@ class Archiver:
|
||||||
|
|
||||||
When rechunking space usage can be substantial, expect at least the entire
|
When rechunking space usage can be substantial, expect at least the entire
|
||||||
deduplicated size of the archives using the previous chunker params.
|
deduplicated size of the archives using the previous chunker params.
|
||||||
When recompressing approximately 1 % of the repository size or 512 MB
|
When recompressing expect approx. (throughput / checkpoint-interval) in space usage,
|
||||||
(whichever is greater) of additional space is used.
|
assuming all chunks are recompressed.
|
||||||
""")
|
""")
|
||||||
subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
|
subparser = subparsers.add_parser('recreate', parents=[common_parser], add_help=False,
|
||||||
description=self.do_recreate.__doc__,
|
description=self.do_recreate.__doc__,
|
||||||
|
|
|
@ -1823,6 +1823,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
||||||
self.cmd('recreate', self.repository_location, '--chunker-params', 'default')
|
self.cmd('recreate', self.repository_location, '--chunker-params', 'default')
|
||||||
self.check_cache()
|
self.check_cache()
|
||||||
# test1 and test2 do deduplicate after recreate
|
# test1 and test2 do deduplicate after recreate
|
||||||
|
assert int(self.cmd('list', self.repository_location + '::test1', 'input/large_file', '--format={size}'))
|
||||||
assert not int(self.cmd('list', self.repository_location + '::test1', 'input/large_file',
|
assert not int(self.cmd('list', self.repository_location + '::test1', 'input/large_file',
|
||||||
'--format', '{unique_chunks}'))
|
'--format', '{unique_chunks}'))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue