mirror of https://github.com/borgbackup/borg.git
recreate: improve tests, docs, check chunk availability[1]
[1] So if e.g. delete/prune/check are run these can delete the recreate_partial_chunks. This is now caught and handled correctly.
This commit is contained in:
parent
4b5e6ffa77
commit
0bc0fa22b4
|
@ -20,8 +20,9 @@ from .compress import Compressor, COMPR_BUFFER
|
|||
from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
|
||||
parse_timestamp, to_localtime, format_time, format_timedelta, \
|
||||
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
|
||||
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, PatternMatcher, \
|
||||
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \
|
||||
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
|
||||
from .repository import Repository
|
||||
from .platform import acl_get, acl_set
|
||||
from .chunker import Chunker
|
||||
from .hashindex import ChunkIndex
|
||||
|
@ -1221,7 +1222,19 @@ class ArchiveRecreater:
|
|||
if self.progress:
|
||||
old_target.stats.show_progress(final=True)
|
||||
target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', [])
|
||||
for chunk_id, _, _ in target.recreate_partial_chunks:
|
||||
for chunk_id, size, csize in target.recreate_partial_chunks:
|
||||
if not self.cache.seen_chunk(chunk_id):
|
||||
try:
|
||||
# Repository has __contains__, RemoteRepository doesn't
|
||||
self.repository.get(chunk_id)
|
||||
except Repository.ObjectNotFound:
|
||||
# delete/prune/check between invocations: these chunks are gone.
|
||||
target.recreate_partial_chunks = None
|
||||
break
|
||||
# fast-lane insert into chunks cache
|
||||
self.cache.chunks[chunk_id] = (1, size, csize)
|
||||
target.stats.update(size, csize, True)
|
||||
continue
|
||||
# incref now, otherwise old_target.delete() might delete these chunks
|
||||
self.cache.chunk_incref(chunk_id, target.stats)
|
||||
old_target.delete(Statistics(), progress=self.progress)
|
||||
|
|
|
@ -816,9 +816,9 @@ class Archiver:
|
|||
"""Re-create archives"""
|
||||
def interrupt(signal_num, stack_frame):
|
||||
if recreater.interrupt:
|
||||
print("Received signal, again. I'm not deaf.\n", file=sys.stderr)
|
||||
print("\nReceived signal, again. I'm not deaf.", file=sys.stderr)
|
||||
else:
|
||||
print("Received signal, will exit cleanly.\n", file=sys.stderr)
|
||||
print("\nReceived signal, will exit cleanly.", file=sys.stderr)
|
||||
recreater.interrupt = True
|
||||
|
||||
matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
|
||||
|
@ -1664,12 +1664,12 @@ class Archiver:
|
|||
Recreate the contents of existing archives.
|
||||
|
||||
--exclude, --exclude-from and PATH have the exact same semantics
|
||||
as in "borg create". If a PATH is specified the resulting archive
|
||||
will only contain files under PATH.
|
||||
as in "borg create". If PATHs are specified the resulting archive
|
||||
will only contain files from these PATHs.
|
||||
|
||||
--compression: all chunks seen will be stored using the given method.
|
||||
Due to how Borg stores compressed size information this might display
|
||||
incorrect information for archives that were not rewritten at the same time.
|
||||
incorrect information for archives that were not recreated at the same time.
|
||||
There is no risk of data loss by this.
|
||||
|
||||
--chunker-params will re-chunk all files in the archive, this can be
|
||||
|
@ -1686,7 +1686,8 @@ class Archiver:
|
|||
Changing chunker params between invocations might lead to data loss.
|
||||
|
||||
USE WITH CAUTION.
|
||||
Permanent data loss by specifying incorrect patterns or PATHS is possible.
|
||||
Depending on the PATHs and patterns given, recreate can be used to permanently
|
||||
delete files from archives.
|
||||
When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
|
||||
interpreted.
|
||||
|
||||
|
@ -1695,7 +1696,7 @@ class Archiver:
|
|||
"<ARCHIVE>.recreate". The new archive will have a different archive ID.
|
||||
|
||||
When rechunking space usage can be substantial, expect at least the entire
|
||||
deduplicated size of the archives using the older chunker params.
|
||||
deduplicated size of the archives using the previous chunker params.
|
||||
When recompressing approximately 1 % of the repository size or 512 MB
|
||||
(whichever is greater) of additional space is used.
|
||||
""")
|
||||
|
@ -1712,7 +1713,7 @@ class Archiver:
|
|||
help='only display items with the given status characters')
|
||||
subparser.add_argument('-p', '--progress', dest='progress',
|
||||
action='store_true', default=False,
|
||||
help='show progress display while rewriting archives')
|
||||
help='show progress display while recreating archives')
|
||||
subparser.add_argument('-n', '--dry-run', dest='dry_run',
|
||||
action='store_true', default=False,
|
||||
help='do not change anything')
|
||||
|
@ -1746,9 +1747,6 @@ class Archiver:
|
|||
metavar='yyyy-mm-ddThh:mm:ss',
|
||||
help='manually specify the archive creation date/time (UTC). '
|
||||
'alternatively, give a reference file/directory.')
|
||||
archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
|
||||
type=int, default=300, metavar='SECONDS',
|
||||
help='write checkpoint every SECONDS seconds (Default: 300)')
|
||||
archive_group.add_argument('-C', '--compression', dest='compression',
|
||||
type=CompressionSpec, default=None, metavar='COMPRESSION',
|
||||
help='select compression algorithm (and level):\n'
|
||||
|
|
|
@ -926,13 +926,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.assert_in("U input/file1", output)
|
||||
self.assert_in("x input/file2", output)
|
||||
|
||||
def test_create_delete_inbetween(self):
|
||||
self.create_test_files()
|
||||
self.cmd('init', self.repository_location)
|
||||
self.cmd('create', self.repository_location + '::test1', 'input')
|
||||
self.cmd('delete', self.repository_location + '::test1')
|
||||
self.cmd('create', self.repository_location + '::test2', 'input')
|
||||
|
||||
def test_create_topical(self):
|
||||
now = time.time()
|
||||
self.create_regular_file('file1', size=1024 * 80)
|
||||
|
@ -1231,8 +1224,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
|
||||
def test_recreate_rechunkify(self):
|
||||
with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd:
|
||||
fd.write(b'a' * 250)
|
||||
fd.write(b'b' * 250)
|
||||
fd.write(b'a' * 280)
|
||||
fd.write(b'b' * 280)
|
||||
self.cmd('init', self.repository_location)
|
||||
self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
|
||||
self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache')
|
||||
|
@ -1249,16 +1242,17 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
def test_recreate_recompress(self):
|
||||
self.create_regular_file('compressible', size=10000)
|
||||
self.cmd('init', self.repository_location)
|
||||
self.cmd('create', self.repository_location + '::test', 'input')
|
||||
list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
|
||||
'--format', '{size} {csize}')
|
||||
size, csize = map(int, list.split(' '))
|
||||
assert csize >= size
|
||||
self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none')
|
||||
file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
|
||||
'--format', '{size} {csize} {sha256}')
|
||||
size, csize, sha256_before = file_list.split(' ')
|
||||
assert int(csize) >= int(size) # >= due to metadata overhead
|
||||
self.cmd('recreate', self.repository_location, '-C', 'lz4')
|
||||
list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
|
||||
'--format', '{size} {csize}')
|
||||
size, csize = map(int, list.split(' '))
|
||||
assert csize < size
|
||||
file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
|
||||
'--format', '{size} {csize} {sha256}')
|
||||
size, csize, sha256_after = file_list.split(' ')
|
||||
assert int(csize) < int(size)
|
||||
assert sha256_before == sha256_after
|
||||
|
||||
def test_recreate_dry_run(self):
|
||||
self.create_regular_file('compressible', size=10000)
|
||||
|
@ -1327,7 +1321,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
frame = inspect.stack()[2]
|
||||
try:
|
||||
caller_self = frame[0].f_locals['self']
|
||||
caller_self.interrupt = True
|
||||
if isinstance(caller_self, ArchiveRecreater):
|
||||
caller_self.interrupt = True
|
||||
finally:
|
||||
del frame
|
||||
return real_add_chunk(*args, **kwargs)
|
||||
|
@ -1339,9 +1334,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('create', self.repository_location + '::test', 'input')
|
||||
archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
|
||||
with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
|
||||
self.cmd('recreate', '-p', '--chunker-params', '16,18,17,4095', self.repository_location)
|
||||
self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
|
||||
assert 'test.recreate' in self.cmd('list', self.repository_location)
|
||||
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '16,18,17,4095', self.repository_location)
|
||||
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
|
||||
assert 'Found test.recreate, will resume' in output
|
||||
assert 'Copied 1 chunks from a partially processed item' in output
|
||||
archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
|
||||
|
|
|
@ -18,7 +18,7 @@ optional arguments
|
|||
``--filter STATUSCHARS``
|
||||
| only display items with the given status characters
|
||||
``-p``, ``--progress``
|
||||
| show progress display while rewriting archives
|
||||
| show progress display while recreating archives
|
||||
``-n``, ``--dry-run``
|
||||
| do not change anything
|
||||
``-s``, ``--stats``
|
||||
|
@ -44,8 +44,6 @@ Archive options
|
|||
| add a comment text to the archive
|
||||
``--timestamp yyyy-mm-ddThh:mm:ss``
|
||||
| manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
|
||||
``-c SECONDS``, ``--checkpoint-interval SECONDS``
|
||||
| write checkpoint every SECONDS seconds (Default: 300)
|
||||
``-C COMPRESSION``, ``--compression COMPRESSION``
|
||||
| select compression algorithm (and level):
|
||||
| none == no compression (default),
|
||||
|
@ -63,12 +61,12 @@ Description
|
|||
Recreate the contents of existing archives.
|
||||
|
||||
--exclude, --exclude-from and PATH have the exact same semantics
|
||||
as in "borg create". If a PATH is specified the resulting archive
|
||||
will only contain files under PATH.
|
||||
as in "borg create". If PATHs are specified the resulting archive
|
||||
will only contain files from these PATHs.
|
||||
|
||||
--compression: all chunks seen will be stored using the given method.
|
||||
Due to how Borg stores compressed size information this might display
|
||||
incorrect information for archives that were not rewritten at the same time.
|
||||
incorrect information for archives that were not recreated at the same time.
|
||||
There is no risk of data loss by this.
|
||||
|
||||
--chunker-params will re-chunk all files in the archive, this can be
|
||||
|
@ -85,7 +83,8 @@ processed files/dirs). Changing compression leads to incorrect size information
|
|||
Changing chunker params between invocations might lead to data loss.
|
||||
|
||||
USE WITH CAUTION.
|
||||
Permanent data loss by specifying incorrect patterns or PATHS is possible.
|
||||
Depending on the PATHs and patterns given, recreate can be used to permanently
|
||||
delete files from archives.
|
||||
When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
|
||||
interpreted.
|
||||
|
||||
|
@ -94,6 +93,6 @@ archive that is built during the operation exists at the same time at
|
|||
"<ARCHIVE>.recreate". The new archive will have a different archive ID.
|
||||
|
||||
When rechunking space usage can be substantial, expect at least the entire
|
||||
deduplicated size of the archives using the older chunker params.
|
||||
deduplicated size of the archives using the previous chunker params.
|
||||
When recompressing approximately 1 % of the repository size or 512 MB
|
||||
(whichever is greater) of additional space is used.
|
||||
|
|
Loading…
Reference in New Issue