Merge pull request #1420 from enkore/f/recreate1.1rc1

recreate goals for 1.1rc1
This commit is contained in:
TW 2016-08-14 18:04:41 +02:00 committed by GitHub
commit 6e9debb027
7 changed files with 120 additions and 28 deletions

View File

@ -19,6 +19,7 @@ logger = create_logger()
from . import xattr
from .cache import ChunkListEntry
from .chunker import Chunker
from .compress import Compressor
from .constants import * # NOQA
from .hashindex import ChunkIndex, ChunkIndexEntry
from .helpers import Manifest
@ -1298,7 +1299,7 @@ class ArchiveRecreater:
def __init__(self, repository, manifest, key, cache, matcher,
exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
chunker_params=None, compression=None, compression_files=None,
chunker_params=None, compression=None, compression_files=None, always_recompress=False,
dry_run=False, stats=False, progress=False, file_status_printer=None):
self.repository = repository
self.key = key
@ -1312,6 +1313,7 @@ class ArchiveRecreater:
self.chunker_params = chunker_params or CHUNKER_PARAMS
self.recompress = bool(compression)
self.always_recompress = always_recompress
self.compression = compression or CompressionSpec('none')
self.seen_chunks = set()
self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
@ -1329,10 +1331,10 @@ class ArchiveRecreater:
self.interrupt = False
self.errors = False
def recreate(self, archive_name, comment=None):
def recreate(self, archive_name, comment=None, target_name=None):
assert not self.is_temporary_archive(archive_name)
archive = self.open_archive(archive_name)
target, resume_from = self.create_target_or_resume(archive)
target, resume_from = self.create_target_or_resume(archive, target_name)
if self.exclude_if_present or self.exclude_caches:
self.matcher_add_tagged_dirs(archive)
if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
@ -1342,7 +1344,8 @@ class ArchiveRecreater:
self.process_items(archive, target, resume_from)
except self.Interrupted as e:
return self.save(archive, target, completed=False, metadata=e.metadata)
return self.save(archive, target, comment)
replace_original = target_name is None
return self.save(archive, target, comment, replace_original=replace_original)
def process_items(self, archive, target, resume_from=None):
matcher = self.matcher
@ -1404,7 +1407,6 @@ class ArchiveRecreater:
def process_chunks(self, archive, target, item):
"""Return new chunk ID list for 'item'."""
# TODO: support --compression-from
if not self.recompress and not target.recreate_rechunkify:
for chunk_id, size, csize in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats)
@ -1412,13 +1414,22 @@ class ArchiveRecreater:
new_chunks = self.process_partial_chunks(target)
chunk_iterator = self.create_chunk_iterator(archive, target, item)
consume(chunk_iterator, len(new_chunks))
compress = self.compression_decider1.decide(item.path)
for chunk in chunk_iterator:
chunk.meta['compress'] = compress
chunk_id = self.key.id_hash(chunk.data)
if chunk_id in self.seen_chunks:
new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
else:
# TODO: detect / skip / --always-recompress
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress)
compression_spec, chunk = self.key.compression_decider2.decide(chunk)
overwrite = self.recompress
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
if Compressor.detect(old_chunk.data).name == compression_spec['name']:
# Stored chunk has the same compression we wanted
overwrite = False
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
new_chunks.append((chunk_id, size, csize))
self.seen_chunks.add(chunk_id)
if self.recompress:
@ -1465,7 +1476,7 @@ class ArchiveRecreater:
logger.debug('Copied %d chunks from a partially processed item', len(partial_chunks))
return partial_chunks
def save(self, archive, target, comment=None, completed=True, metadata=None):
def save(self, archive, target, comment=None, completed=True, metadata=None, replace_original=True):
"""Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
if self.dry_run:
return completed
@ -1477,6 +1488,7 @@ class ArchiveRecreater:
'cmdline': archive.metadata[b'cmdline'],
'recreate_cmdline': sys.argv,
})
if replace_original:
archive.delete(Statistics(), progress=self.progress)
target.rename(archive.name)
if self.stats:
@ -1530,11 +1542,11 @@ class ArchiveRecreater:
matcher.add(tag_files, True)
matcher.add(tagged_dirs, False)
def create_target_or_resume(self, archive):
def create_target_or_resume(self, archive, target_name=None):
"""Create new target archive or resume from temporary archive, if it exists. Return archive, resume from path"""
if self.dry_run:
return self.FakeTargetArchive(), None
target_name = archive.name + '.recreate'
target_name = target_name or archive.name + '.recreate'
resume = target_name in self.manifest.archives
target, resume_from = None, None
if resume:

View File

@ -957,6 +957,7 @@ class Archiver:
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
compression=args.compression, compression_files=args.compression_files,
always_recompress=args.always_recompress,
progress=args.progress, stats=args.stats,
file_status_printer=self.print_file_status,
dry_run=args.dry_run)
@ -968,8 +969,11 @@ class Archiver:
if recreater.is_temporary_archive(name):
self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
return self.exit_code
recreater.recreate(name, args.comment)
recreater.recreate(name, args.comment, args.target)
else:
if args.target is not None:
self.print_error('--target: Need to specify single archive')
return self.exit_code
for archive in manifest.list_archive_infos(sort_by='ts'):
name = archive.name
if recreater.is_temporary_archive(name):
@ -2007,6 +2011,9 @@ class Archiver:
as in "borg create". If PATHs are specified the resulting archive
will only contain files from these PATHs.
Note that all paths in an archive are relative, therefore absolute patterns/paths
will *not* match (--exclude, --exclude-from, --compression-from, PATHs).
--compression: all chunks seen will be stored using the given method.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not recreated at the same time.
@ -2035,6 +2042,8 @@ class Archiver:
archive that is built during the operation exists at the same time at
"<ARCHIVE>.recreate". The new archive will have a different archive ID.
With --target the original archive is not replaced, instead a new archive is created.
When rechunking space usage can be substantial, expect at least the entire
deduplicated size of the archives using the previous chunker params.
When recompressing approximately 1 % of the repository size or 512 MB
@ -2080,6 +2089,10 @@ class Archiver:
help='keep tag files of excluded caches/directories')
archive_group = subparser.add_argument_group('Archive options')
archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None,
type=archivename_validator(),
help='create a new archive with the name ARCHIVE, do not replace existing archive '
'(only applies for a single archive)')
archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default=None,
help='add a comment text to the archive')
archive_group.add_argument('--timestamp', dest='timestamp',
@ -2098,6 +2111,9 @@ class Archiver:
'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
'lzma == lzma (default level 6),\n'
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
help='always recompress chunks, don\'t skip chunks already compressed with the same'
'algorithm.')
archive_group.add_argument('--compression-from', dest='compression_files',
type=argparse.FileType('r'), action='append',
metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')

View File

@ -6,6 +6,8 @@ except ImportError:
from .helpers import Buffer
API_VERSION = 2
cdef extern from "lz4.h":
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
@ -194,9 +196,14 @@ class Compressor:
return self.compressor.compress(data)
def decompress(self, data):
compressor_cls = self.detect(data)
return compressor_cls(**self.params).decompress(data)
@staticmethod
def detect(data):
hdr = bytes(data[:2]) # detect() does not work with memoryview
for cls in COMPRESSOR_LIST:
if cls.detect(hdr):
return cls(**self.params).decompress(data)
return cls
else:
raise ValueError('No decompressor for this data found: %r.', data[:2])

View File

@ -84,11 +84,13 @@ class PlaceholderError(Error):
def check_extension_modules():
from . import platform
from . import platform, compress
if hashindex.API_VERSION != 3:
raise ExtensionModuleError
if chunker.API_VERSION != 2:
raise ExtensionModuleError
if compress.API_VERSION != 2:
raise ExtensionModuleError
if crypto.API_VERSION != 3:
raise ExtensionModuleError
if platform.API_VERSION != 3:

View File

@ -105,9 +105,15 @@ class KeyBase:
def encrypt(self, chunk):
pass
def decrypt(self, id, data):
def decrypt(self, id, data, decompress=True):
pass
def assert_id(self, id, data):
if id:
id_computed = self.id_hash(data)
if not compare_digest(id_computed, id):
raise IntegrityError('Chunk id verification failed')
class PlaintextKey(KeyBase):
TYPE = 0x02
@ -130,12 +136,14 @@ class PlaintextKey(KeyBase):
chunk = self.compress(chunk)
return b''.join([self.TYPE_STR, chunk.data])
def decrypt(self, id, data):
def decrypt(self, id, data, decompress=True):
if data[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
data = self.compressor.decompress(memoryview(data)[1:])
if id and sha256(data).digest() != id:
raise IntegrityError('Chunk id verification failed')
payload = memoryview(data)[1:]
if not decompress:
return Chunk(payload)
data = self.compressor.decompress(payload)
self.assert_id(id, data)
return Chunk(data)
@ -166,7 +174,7 @@ class AESKeyBase(KeyBase):
hmac = hmac_sha256(self.enc_hmac_key, data)
return b''.join((self.TYPE_STR, hmac, data))
def decrypt(self, id, data):
def decrypt(self, id, data, decompress=True):
if not (data[0] == self.TYPE or
data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)):
raise IntegrityError('Invalid encryption envelope')
@ -176,12 +184,11 @@ class AESKeyBase(KeyBase):
if not compare_digest(hmac_computed, hmac_given):
raise IntegrityError('Encryption envelope checksum mismatch')
self.dec_cipher.reset(iv=PREFIX + data[33:41])
data = self.compressor.decompress(self.dec_cipher.decrypt(data_view[41:]))
if id:
hmac_given = id
hmac_computed = hmac_sha256(self.id_key, data)
if not compare_digest(hmac_computed, hmac_given):
raise IntegrityError('Chunk id verification failed')
payload = self.dec_cipher.decrypt(data_view[41:])
if not decompress:
return Chunk(payload)
data = self.compressor.decompress(payload)
self.assert_id(id, data)
return Chunk(data)
def extract_nonce(self, payload):

View File

@ -1522,6 +1522,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('init', self.repository_location, exit_code=1)
assert not os.path.exists(self.repository_location)
def test_recreate_target_rc(self):
self.cmd('init', self.repository_location)
output = self.cmd('recreate', self.repository_location, '--target=asdf', exit_code=2)
assert 'Need to specify single archive' in output
def test_recreate_target(self):
self.create_test_files()
self.cmd('init', self.repository_location)
archive = self.repository_location + '::test0'
self.cmd('create', archive, 'input')
original_archive = self.cmd('list', self.repository_location)
self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3', '--target=new-archive')
archives = self.cmd('list', self.repository_location)
assert original_archive in archives
assert 'new-archive' in archives
archive = self.repository_location + '::new-archive'
listing = self.cmd('list', '--short', archive)
assert 'file1' not in listing
assert 'dir2/file2' in listing
assert 'dir2/file3' not in listing
def test_recreate_basic(self):
self.create_test_files()
self.create_regular_file('dir2/file3', size=1024 * 80)

View File

@ -43,6 +43,14 @@ class TestKey:
monkeypatch.setenv('BORG_KEYS_DIR', tmpdir)
return tmpdir
@pytest.fixture(params=(
KeyfileKey,
PlaintextKey
))
def key(self, request, monkeypatch):
monkeypatch.setenv('BORG_PASSPHRASE', 'test')
return request.param.create(self.MockRepository(), self.MockArgs())
class MockRepository:
class _Location:
orig = '/some/place'
@ -155,6 +163,24 @@ class TestKey:
id[12] = 0
key.decrypt(id, data)
def test_decrypt_decompress(self, key):
plaintext = Chunk(b'123456789')
encrypted = key.encrypt(plaintext)
assert key.decrypt(None, encrypted, decompress=False) != plaintext
assert key.decrypt(None, encrypted) == plaintext
def test_assert_id(self, key):
plaintext = b'123456789'
id = key.id_hash(plaintext)
key.assert_id(id, plaintext)
id_changed = bytearray(id)
id_changed[0] += 1
with pytest.raises(IntegrityError):
key.assert_id(id_changed, plaintext)
plaintext_changed = plaintext + b'1'
with pytest.raises(IntegrityError):
key.assert_id(id, plaintext_changed)
class TestPassphrase:
def test_passphrase_new_verification(self, capsys, monkeypatch):