diff --git a/borg/archive.py b/borg/archive.py index 7fc2ba330..3bf2d30f4 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -20,8 +20,9 @@ from .compress import Compressor, COMPR_BUFFER from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \ parse_timestamp, to_localtime, format_time, format_timedelta, \ Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \ - ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, PatternMatcher, \ + ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume +from .repository import Repository from .platform import acl_get, acl_set from .chunker import Chunker from .hashindex import ChunkIndex @@ -1221,7 +1222,19 @@ class ArchiveRecreater: if self.progress: old_target.stats.show_progress(final=True) target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', []) - for chunk_id, _, _ in target.recreate_partial_chunks: + for chunk_id, size, csize in target.recreate_partial_chunks: + if not self.cache.seen_chunk(chunk_id): + try: + # Repository has __contains__, RemoteRepository doesn't + self.repository.get(chunk_id) + except Repository.ObjectNotFound: + # delete/prune/check between invocations: these chunks are gone. + target.recreate_partial_chunks = None + break + # fast-lane insert into chunks cache + self.cache.chunks[chunk_id] = (1, size, csize) + target.stats.update(size, csize, True) + continue # incref now, otherwise old_target.delete() might delete these chunks self.cache.chunk_incref(chunk_id, target.stats) old_target.delete(Statistics(), progress=self.progress) diff --git a/borg/archiver.py b/borg/archiver.py index 9e141c706..7185de5cc 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -816,9 +816,9 @@ class Archiver: """Re-create archives""" def interrupt(signal_num, stack_frame): if recreater.interrupt: - print("Received signal, again. I'm not deaf.\n", file=sys.stderr) + print("\nReceived signal, again. I'm not deaf.", file=sys.stderr) else: - print("Received signal, will exit cleanly.\n", file=sys.stderr) + print("\nReceived signal, will exit cleanly.", file=sys.stderr) recreater.interrupt = True matcher, include_patterns = self.build_matcher(args.excludes, args.paths) @@ -1664,12 +1664,12 @@ class Archiver: Recreate the contents of existing archives. --exclude, --exclude-from and PATH have the exact same semantics - as in "borg create". If a PATH is specified the resulting archive - will only contain files under PATH. + as in "borg create". If PATHs are specified the resulting archive + will only contain files from these PATHs. --compression: all chunks seen will be stored using the given method. Due to how Borg stores compressed size information this might display - incorrect information for archives that were not rewritten at the same time. + incorrect information for archives that were not recreated at the same time. There is no risk of data loss by this. --chunker-params will re-chunk all files in the archive, this can be @@ -1686,7 +1686,8 @@ class Archiver: Changing chunker params between invocations might lead to data loss. USE WITH CAUTION. - Permanent data loss by specifying incorrect patterns or PATHS is possible. + Depending on the PATHs and patterns given, recreate can be used to permanently + delete files from archives. When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are interpreted. @@ -1695,7 +1696,7 @@ class Archiver: ".recreate". The new archive will have a different archive ID. When rechunking space usage can be substantial, expect at least the entire - deduplicated size of the archives using the older chunker params. + deduplicated size of the archives using the previous chunker params. When recompressing approximately 1 % of the repository size or 512 MB (whichever is greater) of additional space is used. """) @@ -1712,7 +1713,7 @@ class Archiver: help='only display items with the given status characters') subparser.add_argument('-p', '--progress', dest='progress', action='store_true', default=False, - help='show progress display while rewriting archives') + help='show progress display while recreating archives') subparser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true', default=False, help='do not change anything') @@ -1746,9 +1747,6 @@ class Archiver: metavar='yyyy-mm-ddThh:mm:ss', help='manually specify the archive creation date/time (UTC). ' 'alternatively, give a reference file/directory.') - archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', - type=int, default=300, metavar='SECONDS', - help='write checkpoint every SECONDS seconds (Default: 300)') archive_group.add_argument('-C', '--compression', dest='compression', type=CompressionSpec, default=None, metavar='COMPRESSION', help='select compression algorithm (and level):\n' diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 94dacbb6f..d97d34c37 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -926,13 +926,6 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in("U input/file1", output) self.assert_in("x input/file2", output) - def test_create_delete_inbetween(self): - self.create_test_files() - self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test1', 'input') - self.cmd('delete', self.repository_location + '::test1') - self.cmd('create', self.repository_location + '::test2', 'input') - def test_create_topical(self): now = time.time() self.create_regular_file('file1', size=1024 * 80) @@ -1231,8 +1224,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_recreate_rechunkify(self): with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd: - fd.write(b'a' * 250) - fd.write(b'b' * 250) + fd.write(b'a' * 280) + fd.write(b'b' * 280) self.cmd('init', self.repository_location) self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input') self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache') @@ -1249,16 +1242,17 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_recreate_recompress(self): self.create_regular_file('compressible', size=10000) self.cmd('init', self.repository_location) - self.cmd('create', self.repository_location + '::test', 'input') - list = self.cmd('list', self.repository_location + '::test', 'input/compressible', - '--format', '{size} {csize}') - size, csize = map(int, list.split(' ')) - assert csize >= size + self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none') + file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible', + '--format', '{size} {csize} {sha256}') + size, csize, sha256_before = file_list.split(' ') + assert int(csize) >= int(size) # >= due to metadata overhead self.cmd('recreate', self.repository_location, '-C', 'lz4') - list = self.cmd('list', self.repository_location + '::test', 'input/compressible', - '--format', '{size} {csize}') - size, csize = map(int, list.split(' ')) - assert csize < size + file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible', + '--format', '{size} {csize} {sha256}') + size, csize, sha256_after = file_list.split(' ') + assert int(csize) < int(size) + assert sha256_before == sha256_after def test_recreate_dry_run(self): self.create_regular_file('compressible', size=10000) @@ -1327,7 +1321,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): frame = inspect.stack()[2] try: caller_self = frame[0].f_locals['self'] - caller_self.interrupt = True + if isinstance(caller_self, ArchiveRecreater): + caller_self.interrupt = True finally: del frame return real_add_chunk(*args, **kwargs) @@ -1339,9 +1334,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test', 'input') archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()): - self.cmd('recreate', '-p', '--chunker-params', '16,18,17,4095', self.repository_location) + self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location) assert 'test.recreate' in self.cmd('list', self.repository_location) - output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '16,18,17,4095', self.repository_location) + output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location) assert 'Found test.recreate, will resume' in output assert 'Copied 1 chunks from a partially processed item' in output archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') diff --git a/docs/usage/recreate.rst.inc b/docs/usage/recreate.rst.inc index 8df27c7b5..2eb3b4fcc 100644 --- a/docs/usage/recreate.rst.inc +++ b/docs/usage/recreate.rst.inc @@ -18,7 +18,7 @@ optional arguments ``--filter STATUSCHARS`` | only display items with the given status characters ``-p``, ``--progress`` - | show progress display while rewriting archives + | show progress display while recreating archives ``-n``, ``--dry-run`` | do not change anything ``-s``, ``--stats`` @@ -44,8 +44,6 @@ Archive options | add a comment text to the archive ``--timestamp yyyy-mm-ddThh:mm:ss`` | manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory. - ``-c SECONDS``, ``--checkpoint-interval SECONDS`` - | write checkpoint every SECONDS seconds (Default: 300) ``-C COMPRESSION``, ``--compression COMPRESSION`` | select compression algorithm (and level): | none == no compression (default), @@ -63,12 +61,12 @@ Description Recreate the contents of existing archives. --exclude, --exclude-from and PATH have the exact same semantics -as in "borg create". If a PATH is specified the resulting archive -will only contain files under PATH. +as in "borg create". If PATHs are specified the resulting archive +will only contain files from these PATHs. --compression: all chunks seen will be stored using the given method. Due to how Borg stores compressed size information this might display -incorrect information for archives that were not rewritten at the same time. +incorrect information for archives that were not recreated at the same time. There is no risk of data loss by this. --chunker-params will re-chunk all files in the archive, this can be @@ -85,7 +83,8 @@ processed files/dirs). Changing compression leads to incorrect size information Changing chunker params between invocations might lead to data loss. USE WITH CAUTION. -Permanent data loss by specifying incorrect patterns or PATHS is possible. +Depending on the PATHs and patterns given, recreate can be used to permanently +delete files from archives. When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are interpreted. @@ -94,6 +93,6 @@ archive that is built during the operation exists at the same time at ".recreate". The new archive will have a different archive ID. When rechunking space usage can be substantial, expect at least the entire -deduplicated size of the archives using the older chunker params. +deduplicated size of the archives using the previous chunker params. When recompressing approximately 1 % of the repository size or 512 MB (whichever is greater) of additional space is used.