mirror of https://github.com/borgbackup/borg.git
Merge pull request #1420 from enkore/f/recreate1.1rc1
recreate goals for 1.1rc1
This commit is contained in:
commit
6e9debb027
|
@ -19,6 +19,7 @@ logger = create_logger()
|
|||
from . import xattr
|
||||
from .cache import ChunkListEntry
|
||||
from .chunker import Chunker
|
||||
from .compress import Compressor
|
||||
from .constants import * # NOQA
|
||||
from .hashindex import ChunkIndex, ChunkIndexEntry
|
||||
from .helpers import Manifest
|
||||
|
@ -1298,7 +1299,7 @@ class ArchiveRecreater:
|
|||
|
||||
def __init__(self, repository, manifest, key, cache, matcher,
|
||||
exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
|
||||
chunker_params=None, compression=None, compression_files=None,
|
||||
chunker_params=None, compression=None, compression_files=None, always_recompress=False,
|
||||
dry_run=False, stats=False, progress=False, file_status_printer=None):
|
||||
self.repository = repository
|
||||
self.key = key
|
||||
|
@ -1312,6 +1313,7 @@ class ArchiveRecreater:
|
|||
|
||||
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
||||
self.recompress = bool(compression)
|
||||
self.always_recompress = always_recompress
|
||||
self.compression = compression or CompressionSpec('none')
|
||||
self.seen_chunks = set()
|
||||
self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
|
||||
|
@ -1329,10 +1331,10 @@ class ArchiveRecreater:
|
|||
self.interrupt = False
|
||||
self.errors = False
|
||||
|
||||
def recreate(self, archive_name, comment=None):
|
||||
def recreate(self, archive_name, comment=None, target_name=None):
|
||||
assert not self.is_temporary_archive(archive_name)
|
||||
archive = self.open_archive(archive_name)
|
||||
target, resume_from = self.create_target_or_resume(archive)
|
||||
target, resume_from = self.create_target_or_resume(archive, target_name)
|
||||
if self.exclude_if_present or self.exclude_caches:
|
||||
self.matcher_add_tagged_dirs(archive)
|
||||
if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
|
||||
|
@ -1342,7 +1344,8 @@ class ArchiveRecreater:
|
|||
self.process_items(archive, target, resume_from)
|
||||
except self.Interrupted as e:
|
||||
return self.save(archive, target, completed=False, metadata=e.metadata)
|
||||
return self.save(archive, target, comment)
|
||||
replace_original = target_name is None
|
||||
return self.save(archive, target, comment, replace_original=replace_original)
|
||||
|
||||
def process_items(self, archive, target, resume_from=None):
|
||||
matcher = self.matcher
|
||||
|
@ -1404,7 +1407,6 @@ class ArchiveRecreater:
|
|||
|
||||
def process_chunks(self, archive, target, item):
|
||||
"""Return new chunk ID list for 'item'."""
|
||||
# TODO: support --compression-from
|
||||
if not self.recompress and not target.recreate_rechunkify:
|
||||
for chunk_id, size, csize in item.chunks:
|
||||
self.cache.chunk_incref(chunk_id, target.stats)
|
||||
|
@ -1412,13 +1414,22 @@ class ArchiveRecreater:
|
|||
new_chunks = self.process_partial_chunks(target)
|
||||
chunk_iterator = self.create_chunk_iterator(archive, target, item)
|
||||
consume(chunk_iterator, len(new_chunks))
|
||||
compress = self.compression_decider1.decide(item.path)
|
||||
for chunk in chunk_iterator:
|
||||
chunk.meta['compress'] = compress
|
||||
chunk_id = self.key.id_hash(chunk.data)
|
||||
if chunk_id in self.seen_chunks:
|
||||
new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
|
||||
else:
|
||||
# TODO: detect / skip / --always-recompress
|
||||
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress)
|
||||
compression_spec, chunk = self.key.compression_decider2.decide(chunk)
|
||||
overwrite = self.recompress
|
||||
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
|
||||
# Check if this chunk is already compressed the way we want it
|
||||
old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
|
||||
if Compressor.detect(old_chunk.data).name == compression_spec['name']:
|
||||
# Stored chunk has the same compression we wanted
|
||||
overwrite = False
|
||||
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
|
||||
new_chunks.append((chunk_id, size, csize))
|
||||
self.seen_chunks.add(chunk_id)
|
||||
if self.recompress:
|
||||
|
@ -1465,7 +1476,7 @@ class ArchiveRecreater:
|
|||
logger.debug('Copied %d chunks from a partially processed item', len(partial_chunks))
|
||||
return partial_chunks
|
||||
|
||||
def save(self, archive, target, comment=None, completed=True, metadata=None):
|
||||
def save(self, archive, target, comment=None, completed=True, metadata=None, replace_original=True):
|
||||
"""Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
|
||||
if self.dry_run:
|
||||
return completed
|
||||
|
@ -1477,6 +1488,7 @@ class ArchiveRecreater:
|
|||
'cmdline': archive.metadata[b'cmdline'],
|
||||
'recreate_cmdline': sys.argv,
|
||||
})
|
||||
if replace_original:
|
||||
archive.delete(Statistics(), progress=self.progress)
|
||||
target.rename(archive.name)
|
||||
if self.stats:
|
||||
|
@ -1530,11 +1542,11 @@ class ArchiveRecreater:
|
|||
matcher.add(tag_files, True)
|
||||
matcher.add(tagged_dirs, False)
|
||||
|
||||
def create_target_or_resume(self, archive):
|
||||
def create_target_or_resume(self, archive, target_name=None):
|
||||
"""Create new target archive or resume from temporary archive, if it exists. Return archive, resume from path"""
|
||||
if self.dry_run:
|
||||
return self.FakeTargetArchive(), None
|
||||
target_name = archive.name + '.recreate'
|
||||
target_name = target_name or archive.name + '.recreate'
|
||||
resume = target_name in self.manifest.archives
|
||||
target, resume_from = None, None
|
||||
if resume:
|
||||
|
|
|
@ -957,6 +957,7 @@ class Archiver:
|
|||
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
|
||||
keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
|
||||
compression=args.compression, compression_files=args.compression_files,
|
||||
always_recompress=args.always_recompress,
|
||||
progress=args.progress, stats=args.stats,
|
||||
file_status_printer=self.print_file_status,
|
||||
dry_run=args.dry_run)
|
||||
|
@ -968,8 +969,11 @@ class Archiver:
|
|||
if recreater.is_temporary_archive(name):
|
||||
self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
|
||||
return self.exit_code
|
||||
recreater.recreate(name, args.comment)
|
||||
recreater.recreate(name, args.comment, args.target)
|
||||
else:
|
||||
if args.target is not None:
|
||||
self.print_error('--target: Need to specify single archive')
|
||||
return self.exit_code
|
||||
for archive in manifest.list_archive_infos(sort_by='ts'):
|
||||
name = archive.name
|
||||
if recreater.is_temporary_archive(name):
|
||||
|
@ -2007,6 +2011,9 @@ class Archiver:
|
|||
as in "borg create". If PATHs are specified the resulting archive
|
||||
will only contain files from these PATHs.
|
||||
|
||||
Note that all paths in an archive are relative, therefore absolute patterns/paths
|
||||
will *not* match (--exclude, --exclude-from, --compression-from, PATHs).
|
||||
|
||||
--compression: all chunks seen will be stored using the given method.
|
||||
Due to how Borg stores compressed size information this might display
|
||||
incorrect information for archives that were not recreated at the same time.
|
||||
|
@ -2035,6 +2042,8 @@ class Archiver:
|
|||
archive that is built during the operation exists at the same time at
|
||||
"<ARCHIVE>.recreate". The new archive will have a different archive ID.
|
||||
|
||||
With --target the original archive is not replaced, instead a new archive is created.
|
||||
|
||||
When rechunking space usage can be substantial, expect at least the entire
|
||||
deduplicated size of the archives using the previous chunker params.
|
||||
When recompressing approximately 1 % of the repository size or 512 MB
|
||||
|
@ -2080,6 +2089,10 @@ class Archiver:
|
|||
help='keep tag files of excluded caches/directories')
|
||||
|
||||
archive_group = subparser.add_argument_group('Archive options')
|
||||
archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None,
|
||||
type=archivename_validator(),
|
||||
help='create a new archive with the name ARCHIVE, do not replace existing archive '
|
||||
'(only applies for a single archive)')
|
||||
archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default=None,
|
||||
help='add a comment text to the archive')
|
||||
archive_group.add_argument('--timestamp', dest='timestamp',
|
||||
|
@ -2098,6 +2111,9 @@ class Archiver:
|
|||
'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
|
||||
'lzma == lzma (default level 6),\n'
|
||||
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
|
||||
archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
|
||||
help='always recompress chunks, don\'t skip chunks already compressed with the same'
|
||||
'algorithm.')
|
||||
archive_group.add_argument('--compression-from', dest='compression_files',
|
||||
type=argparse.FileType('r'), action='append',
|
||||
metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
|
||||
|
|
|
@ -6,6 +6,8 @@ except ImportError:
|
|||
|
||||
from .helpers import Buffer
|
||||
|
||||
API_VERSION = 2
|
||||
|
||||
cdef extern from "lz4.h":
|
||||
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
|
@ -194,9 +196,14 @@ class Compressor:
|
|||
return self.compressor.compress(data)
|
||||
|
||||
def decompress(self, data):
|
||||
compressor_cls = self.detect(data)
|
||||
return compressor_cls(**self.params).decompress(data)
|
||||
|
||||
@staticmethod
|
||||
def detect(data):
|
||||
hdr = bytes(data[:2]) # detect() does not work with memoryview
|
||||
for cls in COMPRESSOR_LIST:
|
||||
if cls.detect(hdr):
|
||||
return cls(**self.params).decompress(data)
|
||||
return cls
|
||||
else:
|
||||
raise ValueError('No decompressor for this data found: %r.', data[:2])
|
||||
|
|
|
@ -84,11 +84,13 @@ class PlaceholderError(Error):
|
|||
|
||||
|
||||
def check_extension_modules():
|
||||
from . import platform
|
||||
from . import platform, compress
|
||||
if hashindex.API_VERSION != 3:
|
||||
raise ExtensionModuleError
|
||||
if chunker.API_VERSION != 2:
|
||||
raise ExtensionModuleError
|
||||
if compress.API_VERSION != 2:
|
||||
raise ExtensionModuleError
|
||||
if crypto.API_VERSION != 3:
|
||||
raise ExtensionModuleError
|
||||
if platform.API_VERSION != 3:
|
||||
|
|
|
@ -105,9 +105,15 @@ class KeyBase:
|
|||
def encrypt(self, chunk):
|
||||
pass
|
||||
|
||||
def decrypt(self, id, data):
|
||||
def decrypt(self, id, data, decompress=True):
|
||||
pass
|
||||
|
||||
def assert_id(self, id, data):
|
||||
if id:
|
||||
id_computed = self.id_hash(data)
|
||||
if not compare_digest(id_computed, id):
|
||||
raise IntegrityError('Chunk id verification failed')
|
||||
|
||||
|
||||
class PlaintextKey(KeyBase):
|
||||
TYPE = 0x02
|
||||
|
@ -130,12 +136,14 @@ class PlaintextKey(KeyBase):
|
|||
chunk = self.compress(chunk)
|
||||
return b''.join([self.TYPE_STR, chunk.data])
|
||||
|
||||
def decrypt(self, id, data):
|
||||
def decrypt(self, id, data, decompress=True):
|
||||
if data[0] != self.TYPE:
|
||||
raise IntegrityError('Invalid encryption envelope')
|
||||
data = self.compressor.decompress(memoryview(data)[1:])
|
||||
if id and sha256(data).digest() != id:
|
||||
raise IntegrityError('Chunk id verification failed')
|
||||
payload = memoryview(data)[1:]
|
||||
if not decompress:
|
||||
return Chunk(payload)
|
||||
data = self.compressor.decompress(payload)
|
||||
self.assert_id(id, data)
|
||||
return Chunk(data)
|
||||
|
||||
|
||||
|
@ -166,7 +174,7 @@ class AESKeyBase(KeyBase):
|
|||
hmac = hmac_sha256(self.enc_hmac_key, data)
|
||||
return b''.join((self.TYPE_STR, hmac, data))
|
||||
|
||||
def decrypt(self, id, data):
|
||||
def decrypt(self, id, data, decompress=True):
|
||||
if not (data[0] == self.TYPE or
|
||||
data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)):
|
||||
raise IntegrityError('Invalid encryption envelope')
|
||||
|
@ -176,12 +184,11 @@ class AESKeyBase(KeyBase):
|
|||
if not compare_digest(hmac_computed, hmac_given):
|
||||
raise IntegrityError('Encryption envelope checksum mismatch')
|
||||
self.dec_cipher.reset(iv=PREFIX + data[33:41])
|
||||
data = self.compressor.decompress(self.dec_cipher.decrypt(data_view[41:]))
|
||||
if id:
|
||||
hmac_given = id
|
||||
hmac_computed = hmac_sha256(self.id_key, data)
|
||||
if not compare_digest(hmac_computed, hmac_given):
|
||||
raise IntegrityError('Chunk id verification failed')
|
||||
payload = self.dec_cipher.decrypt(data_view[41:])
|
||||
if not decompress:
|
||||
return Chunk(payload)
|
||||
data = self.compressor.decompress(payload)
|
||||
self.assert_id(id, data)
|
||||
return Chunk(data)
|
||||
|
||||
def extract_nonce(self, payload):
|
||||
|
|
|
@ -1522,6 +1522,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('init', self.repository_location, exit_code=1)
|
||||
assert not os.path.exists(self.repository_location)
|
||||
|
||||
def test_recreate_target_rc(self):
|
||||
self.cmd('init', self.repository_location)
|
||||
output = self.cmd('recreate', self.repository_location, '--target=asdf', exit_code=2)
|
||||
assert 'Need to specify single archive' in output
|
||||
|
||||
def test_recreate_target(self):
|
||||
self.create_test_files()
|
||||
self.cmd('init', self.repository_location)
|
||||
archive = self.repository_location + '::test0'
|
||||
self.cmd('create', archive, 'input')
|
||||
original_archive = self.cmd('list', self.repository_location)
|
||||
self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3', '--target=new-archive')
|
||||
archives = self.cmd('list', self.repository_location)
|
||||
assert original_archive in archives
|
||||
assert 'new-archive' in archives
|
||||
|
||||
archive = self.repository_location + '::new-archive'
|
||||
listing = self.cmd('list', '--short', archive)
|
||||
assert 'file1' not in listing
|
||||
assert 'dir2/file2' in listing
|
||||
assert 'dir2/file3' not in listing
|
||||
|
||||
def test_recreate_basic(self):
|
||||
self.create_test_files()
|
||||
self.create_regular_file('dir2/file3', size=1024 * 80)
|
||||
|
|
|
@ -43,6 +43,14 @@ class TestKey:
|
|||
monkeypatch.setenv('BORG_KEYS_DIR', tmpdir)
|
||||
return tmpdir
|
||||
|
||||
@pytest.fixture(params=(
|
||||
KeyfileKey,
|
||||
PlaintextKey
|
||||
))
|
||||
def key(self, request, monkeypatch):
|
||||
monkeypatch.setenv('BORG_PASSPHRASE', 'test')
|
||||
return request.param.create(self.MockRepository(), self.MockArgs())
|
||||
|
||||
class MockRepository:
|
||||
class _Location:
|
||||
orig = '/some/place'
|
||||
|
@ -155,6 +163,24 @@ class TestKey:
|
|||
id[12] = 0
|
||||
key.decrypt(id, data)
|
||||
|
||||
def test_decrypt_decompress(self, key):
|
||||
plaintext = Chunk(b'123456789')
|
||||
encrypted = key.encrypt(plaintext)
|
||||
assert key.decrypt(None, encrypted, decompress=False) != plaintext
|
||||
assert key.decrypt(None, encrypted) == plaintext
|
||||
|
||||
def test_assert_id(self, key):
|
||||
plaintext = b'123456789'
|
||||
id = key.id_hash(plaintext)
|
||||
key.assert_id(id, plaintext)
|
||||
id_changed = bytearray(id)
|
||||
id_changed[0] += 1
|
||||
with pytest.raises(IntegrityError):
|
||||
key.assert_id(id_changed, plaintext)
|
||||
plaintext_changed = plaintext + b'1'
|
||||
with pytest.raises(IntegrityError):
|
||||
key.assert_id(id, plaintext_changed)
|
||||
|
||||
|
||||
class TestPassphrase:
|
||||
def test_passphrase_new_verification(self, capsys, monkeypatch):
|
||||
|
|
Loading…
Reference in New Issue