recreate: improve tests, docs, check chunk availability[1]

[1] So if e.g. delete/prune/check are run these can delete the
    recreate_partial_chunks. This is now caught and handled correctly.
This commit is contained in:
Marian Beermann 2016-04-10 15:59:10 +02:00
parent 4b5e6ffa77
commit 0bc0fa22b4
No known key found for this signature in database
GPG Key ID: 9B8450B91D1362C1
4 changed files with 47 additions and 42 deletions

View File

@ -20,8 +20,9 @@ from .compress import Compressor, COMPR_BUFFER
from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
parse_timestamp, to_localtime, format_time, format_timedelta, \
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, PatternMatcher, \
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
from .repository import Repository
from .platform import acl_get, acl_set
from .chunker import Chunker
from .hashindex import ChunkIndex
@ -1221,7 +1222,19 @@ class ArchiveRecreater:
if self.progress:
old_target.stats.show_progress(final=True)
target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', [])
for chunk_id, _, _ in target.recreate_partial_chunks:
for chunk_id, size, csize in target.recreate_partial_chunks:
if not self.cache.seen_chunk(chunk_id):
try:
# Repository has __contains__, RemoteRepository doesn't
self.repository.get(chunk_id)
except Repository.ObjectNotFound:
# delete/prune/check between invocations: these chunks are gone.
target.recreate_partial_chunks = None
break
# fast-lane insert into chunks cache
self.cache.chunks[chunk_id] = (1, size, csize)
target.stats.update(size, csize, True)
continue
# incref now, otherwise old_target.delete() might delete these chunks
self.cache.chunk_incref(chunk_id, target.stats)
old_target.delete(Statistics(), progress=self.progress)

View File

@ -816,9 +816,9 @@ class Archiver:
"""Re-create archives"""
def interrupt(signal_num, stack_frame):
if recreater.interrupt:
print("Received signal, again. I'm not deaf.\n", file=sys.stderr)
print("\nReceived signal, again. I'm not deaf.", file=sys.stderr)
else:
print("Received signal, will exit cleanly.\n", file=sys.stderr)
print("\nReceived signal, will exit cleanly.", file=sys.stderr)
recreater.interrupt = True
matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
@ -1664,12 +1664,12 @@ class Archiver:
Recreate the contents of existing archives.
--exclude, --exclude-from and PATH have the exact same semantics
as in "borg create". If a PATH is specified the resulting archive
will only contain files under PATH.
as in "borg create". If PATHs are specified the resulting archive
will only contain files from these PATHs.
--compression: all chunks seen will be stored using the given method.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not rewritten at the same time.
incorrect information for archives that were not recreated at the same time.
There is no risk of data loss by this.
--chunker-params will re-chunk all files in the archive, this can be
@ -1686,7 +1686,8 @@ class Archiver:
Changing chunker params between invocations might lead to data loss.
USE WITH CAUTION.
Permanent data loss by specifying incorrect patterns or PATHS is possible.
Depending on the PATHs and patterns given, recreate can be used to permanently
delete files from archives.
When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
interpreted.
@ -1695,7 +1696,7 @@ class Archiver:
"<ARCHIVE>.recreate". The new archive will have a different archive ID.
When rechunking space usage can be substantial, expect at least the entire
deduplicated size of the archives using the older chunker params.
deduplicated size of the archives using the previous chunker params.
When recompressing approximately 1 % of the repository size or 512 MB
(whichever is greater) of additional space is used.
""")
@ -1712,7 +1713,7 @@ class Archiver:
help='only display items with the given status characters')
subparser.add_argument('-p', '--progress', dest='progress',
action='store_true', default=False,
help='show progress display while rewriting archives')
help='show progress display while recreating archives')
subparser.add_argument('-n', '--dry-run', dest='dry_run',
action='store_true', default=False,
help='do not change anything')
@ -1746,9 +1747,6 @@ class Archiver:
metavar='yyyy-mm-ddThh:mm:ss',
help='manually specify the archive creation date/time (UTC). '
'alternatively, give a reference file/directory.')
archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 300)')
archive_group.add_argument('-C', '--compression', dest='compression',
type=CompressionSpec, default=None, metavar='COMPRESSION',
help='select compression algorithm (and level):\n'

View File

@ -926,13 +926,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.assert_in("U input/file1", output)
self.assert_in("x input/file2", output)
def test_create_delete_inbetween(self):
self.create_test_files()
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test1', 'input')
self.cmd('delete', self.repository_location + '::test1')
self.cmd('create', self.repository_location + '::test2', 'input')
def test_create_topical(self):
now = time.time()
self.create_regular_file('file1', size=1024 * 80)
@ -1231,8 +1224,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
def test_recreate_rechunkify(self):
with open(os.path.join(self.input_path, 'large_file'), 'wb') as fd:
fd.write(b'a' * 250)
fd.write(b'b' * 250)
fd.write(b'a' * 280)
fd.write(b'b' * 280)
self.cmd('init', self.repository_location)
self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache')
@ -1249,16 +1242,17 @@ class ArchiverTestCase(ArchiverTestCaseBase):
def test_recreate_recompress(self):
self.create_regular_file('compressible', size=10000)
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
'--format', '{size} {csize}')
size, csize = map(int, list.split(' '))
assert csize >= size
self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none')
file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
'--format', '{size} {csize} {sha256}')
size, csize, sha256_before = file_list.split(' ')
assert int(csize) >= int(size) # >= due to metadata overhead
self.cmd('recreate', self.repository_location, '-C', 'lz4')
list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
'--format', '{size} {csize}')
size, csize = map(int, list.split(' '))
assert csize < size
file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
'--format', '{size} {csize} {sha256}')
size, csize, sha256_after = file_list.split(' ')
assert int(csize) < int(size)
assert sha256_before == sha256_after
def test_recreate_dry_run(self):
self.create_regular_file('compressible', size=10000)
@ -1327,7 +1321,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
frame = inspect.stack()[2]
try:
caller_self = frame[0].f_locals['self']
caller_self.interrupt = True
if isinstance(caller_self, ArchiveRecreater):
caller_self.interrupt = True
finally:
del frame
return real_add_chunk(*args, **kwargs)
@ -1339,9 +1334,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::test', 'input')
archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
self.cmd('recreate', '-p', '--chunker-params', '16,18,17,4095', self.repository_location)
self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
assert 'test.recreate' in self.cmd('list', self.repository_location)
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '16,18,17,4095', self.repository_location)
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
assert 'Found test.recreate, will resume' in output
assert 'Copied 1 chunks from a partially processed item' in output
archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')

View File

@ -18,7 +18,7 @@ optional arguments
``--filter STATUSCHARS``
| only display items with the given status characters
``-p``, ``--progress``
| show progress display while rewriting archives
| show progress display while recreating archives
``-n``, ``--dry-run``
| do not change anything
``-s``, ``--stats``
@ -44,8 +44,6 @@ Archive options
| add a comment text to the archive
``--timestamp yyyy-mm-ddThh:mm:ss``
| manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
``-c SECONDS``, ``--checkpoint-interval SECONDS``
| write checkpoint every SECONDS seconds (Default: 300)
``-C COMPRESSION``, ``--compression COMPRESSION``
| select compression algorithm (and level):
| none == no compression (default),
@ -63,12 +61,12 @@ Description
Recreate the contents of existing archives.
--exclude, --exclude-from and PATH have the exact same semantics
as in "borg create". If a PATH is specified the resulting archive
will only contain files under PATH.
as in "borg create". If PATHs are specified the resulting archive
will only contain files from these PATHs.
--compression: all chunks seen will be stored using the given method.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not rewritten at the same time.
incorrect information for archives that were not recreated at the same time.
There is no risk of data loss by this.
--chunker-params will re-chunk all files in the archive, this can be
@ -85,7 +83,8 @@ processed files/dirs). Changing compression leads to incorrect size information
Changing chunker params between invocations might lead to data loss.
USE WITH CAUTION.
Permanent data loss by specifying incorrect patterns or PATHS is possible.
Depending on the PATHs and patterns given, recreate can be used to permanently
delete files from archives.
When in doubt, use "--dry-run --verbose --list" to see how patterns/PATHS are
interpreted.
@ -94,6 +93,6 @@ archive that is built during the operation exists at the same time at
"<ARCHIVE>.recreate". The new archive will have a different archive ID.
When rechunking space usage can be substantial, expect at least the entire
deduplicated size of the archives using the older chunker params.
deduplicated size of the archives using the previous chunker params.
When recompressing approximately 1 % of the repository size or 512 MB
(whichever is greater) of additional space is used.