mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-24 23:13:25 +00:00
cleanup msgpack related str/bytes mess, fixes #968
see ticket and borg.helpers.msgpack docstring. this changeset implements the full migration to msgpack 2.0 spec (use_bin_type=True, raw=False). still needed compat to the past is done via want_bytes decoder in borg.item.
This commit is contained in:
parent
f8dbe5b542
commit
8e87f1111b
11 changed files with 124 additions and 129 deletions
|
@ -1718,13 +1718,10 @@ def rebuild_manifest(self):
|
|||
|
||||
Iterates through all objects in the repository looking for archive metadata blocks.
|
||||
"""
|
||||
required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
|
||||
|
||||
def valid_archive(obj):
|
||||
if not isinstance(obj, dict):
|
||||
return False
|
||||
keys = set(obj)
|
||||
return required_archive_keys.issubset(keys)
|
||||
return REQUIRED_ARCHIVE_KEYS.issubset(obj)
|
||||
|
||||
logger.info('Rebuilding missing manifest, this might take some time...')
|
||||
# as we have lost the manifest, we do not know any more what valid item keys we had.
|
||||
|
@ -1904,7 +1901,7 @@ def list_keys_safe(keys):
|
|||
def valid_item(obj):
|
||||
if not isinstance(obj, StableDict):
|
||||
return False, 'not a dictionary'
|
||||
keys = set(k.decode('utf-8', errors='replace') for k in obj)
|
||||
keys = set(obj)
|
||||
if not required_item_keys.issubset(keys):
|
||||
return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
|
||||
if not keys.issubset(item_keys):
|
||||
|
|
|
@ -2331,7 +2331,7 @@ def output(fd):
|
|||
|
||||
unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
|
||||
first = True
|
||||
for item_id in archive_org_dict[b'items']:
|
||||
for item_id in archive_org_dict['items']:
|
||||
data = key.decrypt(item_id, repository.get(item_id))
|
||||
unpacker.feed(data)
|
||||
for item in unpacker:
|
||||
|
|
|
@ -232,24 +232,24 @@ def unpack_and_verify_manifest(self, data, force_tam_not_required=False):
|
|||
unpacker = get_limited_unpacker('manifest')
|
||||
unpacker.feed(data)
|
||||
unpacked = unpacker.unpack()
|
||||
if b'tam' not in unpacked:
|
||||
if 'tam' not in unpacked:
|
||||
if tam_required:
|
||||
raise TAMRequiredError(self.repository._location.canonical_path())
|
||||
else:
|
||||
logger.debug('TAM not found and not required')
|
||||
return unpacked, False
|
||||
tam = unpacked.pop(b'tam', None)
|
||||
tam = unpacked.pop('tam', None)
|
||||
if not isinstance(tam, dict):
|
||||
raise TAMInvalid()
|
||||
tam_type = tam.get(b'type', b'<none>').decode('ascii', 'replace')
|
||||
tam_type = tam.get('type', '<none>')
|
||||
if tam_type != 'HKDF_HMAC_SHA512':
|
||||
if tam_required:
|
||||
raise TAMUnsupportedSuiteError(repr(tam_type))
|
||||
else:
|
||||
logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
|
||||
return unpacked, False
|
||||
tam_hmac = tam.get(b'hmac')
|
||||
tam_salt = tam.get(b'salt')
|
||||
tam_hmac = tam.get('hmac')
|
||||
tam_salt = tam.get('salt')
|
||||
if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes):
|
||||
raise TAMInvalid()
|
||||
offset = data.index(tam_hmac)
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
wrapping msgpack
|
||||
================
|
||||
|
||||
Due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it -
|
||||
to avoid having lots of clutter in the calling code. see tickets #968 and #3632.
|
||||
We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code.
|
||||
|
||||
Packing
|
||||
-------
|
||||
|
@ -22,30 +21,27 @@
|
|||
|
||||
Unpacking
|
||||
---------
|
||||
- raw = True (the old way, used by borg <= 1.3)
|
||||
This is currently still needed to not try to decode "raw" msgpack objects.
|
||||
These could come either from str (new or old msgpack) or bytes (old msgpack).
|
||||
Thus, we basically must know what we want and either keep the bytes we get
|
||||
or decode them to str, if we want str.
|
||||
|
||||
- raw = False (the new way)
|
||||
This can be used in future, when we do not have to deal with data any more that was packed the old way.
|
||||
- raw = False (used by borg since borg 1.3)
|
||||
We already can use this with borg 1.3 due to the want_bytes decoder.
|
||||
This decoder can be removed in future, when we do not have to deal with data any more that was packed the old way.
|
||||
It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
|
||||
|
||||
- raw = True (the old way, used by borg < 1.3)
|
||||
|
||||
- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
|
||||
|
||||
As of borg 1.3, we have the first part on the way to fix the msgpack str/bytes mess, #968.
|
||||
borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
|
||||
But from now on, borg only **writes** new data according to the new msgpack spec,
|
||||
thus we can complete the fix for #968 in a later borg release.
|
||||
As of borg 1.3, we have fixed most of the msgpack str/bytes mess, #968.
|
||||
Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
|
||||
But from now on, borg only **writes** new data according to the new msgpack 2.0 spec,
|
||||
thus we can remove some legacy support in a later borg release (some places are marked with "legacy").
|
||||
|
||||
current way in msgpack terms
|
||||
----------------------------
|
||||
|
||||
- pack with use_bin_type=True (according to msgpack 2.0 spec)
|
||||
- packs str -> raw and bytes -> bin
|
||||
- unpack with raw=True (aka "the old way")
|
||||
- unpacks raw to bytes (thus we always need to decode manually if we want str)
|
||||
- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape')
|
||||
- unpacks bin to bytes and raw to str (thus we need to re-encode manually if we want bytes from "raw")
|
||||
"""
|
||||
|
||||
from .datastruct import StableDict
|
||||
|
@ -66,8 +62,8 @@
|
|||
version = mp_version
|
||||
|
||||
USE_BIN_TYPE = True
|
||||
RAW = True # should become False later when we do not need to read old stuff any more
|
||||
UNICODE_ERRORS = 'surrogateescape' # previously done by safe_encode, safe_decode
|
||||
RAW = False
|
||||
UNICODE_ERRORS = 'surrogateescape'
|
||||
|
||||
|
||||
class PackException(Exception):
|
||||
|
@ -161,7 +157,7 @@ def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
|
|||
def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
|
||||
strict_map_key=False,
|
||||
**kwargs):
|
||||
# assert raw == RAW
|
||||
assert raw == RAW
|
||||
assert unicode_errors == UNICODE_ERRORS
|
||||
try:
|
||||
kw = dict(raw=raw, unicode_errors=unicode_errors,
|
||||
|
|
|
@ -60,6 +60,15 @@ def fix_tuple_of_str_and_int(t):
|
|||
return t
|
||||
|
||||
|
||||
def want_bytes(v):
|
||||
"""we know that we want bytes and the value should be bytes"""
|
||||
# legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False
|
||||
if isinstance(v, str):
|
||||
v = v.encode('utf-8', errors='surrogateescape')
|
||||
assert isinstance(v, bytes)
|
||||
return v
|
||||
|
||||
|
||||
class PropDict:
|
||||
"""
|
||||
Manage a dictionary via properties.
|
||||
|
@ -204,10 +213,10 @@ class Item(PropDict):
|
|||
user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None')
|
||||
group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None')
|
||||
|
||||
acl_access = PropDict._make_property('acl_access', bytes)
|
||||
acl_default = PropDict._make_property('acl_default', bytes)
|
||||
acl_extended = PropDict._make_property('acl_extended', bytes)
|
||||
acl_nfs4 = PropDict._make_property('acl_nfs4', bytes)
|
||||
acl_access = PropDict._make_property('acl_access', bytes, decode=want_bytes)
|
||||
acl_default = PropDict._make_property('acl_default', bytes, decode=want_bytes)
|
||||
acl_extended = PropDict._make_property('acl_extended', bytes, decode=want_bytes)
|
||||
acl_nfs4 = PropDict._make_property('acl_nfs4', bytes, decode=want_bytes)
|
||||
|
||||
mode = PropDict._make_property('mode', int)
|
||||
uid = PropDict._make_property('uid', int)
|
||||
|
@ -224,7 +233,7 @@ class Item(PropDict):
|
|||
# compatibility note: this is a new feature, in old archives size will be missing.
|
||||
size = PropDict._make_property('size', int)
|
||||
|
||||
hlid = PropDict._make_property('hlid', bytes) # hard link id: same value means same hard link.
|
||||
hlid = PropDict._make_property('hlid', bytes, decode=want_bytes) # hard link id: same value means same hard link.
|
||||
hardlink_master = PropDict._make_property('hardlink_master', bool) # legacy
|
||||
|
||||
chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
|
||||
|
@ -363,9 +372,9 @@ class EncryptedKey(PropDict):
|
|||
version = PropDict._make_property('version', int)
|
||||
algorithm = PropDict._make_property('algorithm', str)
|
||||
iterations = PropDict._make_property('iterations', int)
|
||||
salt = PropDict._make_property('salt', bytes)
|
||||
hash = PropDict._make_property('hash', bytes)
|
||||
data = PropDict._make_property('data', bytes)
|
||||
salt = PropDict._make_property('salt', bytes, decode=want_bytes)
|
||||
hash = PropDict._make_property('hash', bytes, decode=want_bytes)
|
||||
data = PropDict._make_property('data', bytes, decode=want_bytes)
|
||||
argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
|
||||
argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
|
||||
argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
|
||||
|
@ -399,10 +408,10 @@ class Key(PropDict):
|
|||
__slots__ = ("_dict", ) # avoid setting attributes not supported by properties
|
||||
|
||||
version = PropDict._make_property('version', int)
|
||||
repository_id = PropDict._make_property('repository_id', bytes)
|
||||
enc_key = PropDict._make_property('enc_key', bytes)
|
||||
enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
|
||||
id_key = PropDict._make_property('id_key', bytes)
|
||||
repository_id = PropDict._make_property('repository_id', bytes, decode=want_bytes)
|
||||
enc_key = PropDict._make_property('enc_key', bytes, decode=want_bytes)
|
||||
enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes, decode=want_bytes)
|
||||
id_key = PropDict._make_property('id_key', bytes, decode=want_bytes)
|
||||
chunk_seed = PropDict._make_property('chunk_seed', int)
|
||||
tam_required = PropDict._make_property('tam_required', bool)
|
||||
|
||||
|
@ -443,7 +452,7 @@ class ArchiveItem(PropDict):
|
|||
chunker_params = PropDict._make_property('chunker_params', tuple)
|
||||
recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str
|
||||
# recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
|
||||
recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
|
||||
recreate_source_id = PropDict._make_property('recreate_source_id', bytes, decode=want_bytes)
|
||||
recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str
|
||||
recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples
|
||||
size = PropDict._make_property('size', int)
|
||||
|
|
|
@ -38,8 +38,7 @@
|
|||
|
||||
RPC_PROTOCOL_VERSION = 2
|
||||
BORG_VERSION = parse_version(__version__)
|
||||
MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r' # pack
|
||||
MSGIDB, MSGB, ARGSB, RESULTB = b'i', b'm', b'a', b'r' # unpack
|
||||
MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'
|
||||
|
||||
MAX_INFLIGHT = 100
|
||||
|
||||
|
@ -139,10 +138,6 @@ def __init__(self, data):
|
|||
}
|
||||
|
||||
|
||||
def decode_keys(d):
|
||||
return {k.decode(): d[k] for k in d}
|
||||
|
||||
|
||||
class RepositoryServer: # pragma: no cover
|
||||
rpc_methods = (
|
||||
'__len__',
|
||||
|
@ -217,14 +212,13 @@ def serve(self):
|
|||
for unpacked in unpacker:
|
||||
if isinstance(unpacked, dict):
|
||||
dictFormat = True
|
||||
msgid = unpacked[MSGIDB]
|
||||
method = unpacked[MSGB].decode()
|
||||
args = decode_keys(unpacked[ARGSB])
|
||||
msgid = unpacked[MSGID]
|
||||
method = unpacked[MSG]
|
||||
args = unpacked[ARGS]
|
||||
elif isinstance(unpacked, tuple) and len(unpacked) == 4:
|
||||
dictFormat = False
|
||||
# The first field 'type' was always 1 and has always been ignored
|
||||
_, msgid, method, args = unpacked
|
||||
method = method.decode()
|
||||
args = self.positional_to_named(method, args)
|
||||
else:
|
||||
if self.repository is not None:
|
||||
|
@ -308,7 +302,7 @@ def negotiate(self, client_data):
|
|||
# clients since 1.1.0b3 use a dict as client_data
|
||||
# clients since 1.1.0b6 support json log format from server
|
||||
if isinstance(client_data, dict):
|
||||
self.client_version = client_data[b'client_version']
|
||||
self.client_version = client_data['client_version']
|
||||
level = logging.getLevelName(logging.getLogger('').level)
|
||||
setup_logging(is_serve=True, json=True, level=level)
|
||||
logger.debug('Initialized logging system for JSON-based protocol')
|
||||
|
@ -370,7 +364,6 @@ def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, ap
|
|||
return self.repository.id
|
||||
|
||||
def inject_exception(self, kind):
|
||||
kind = kind.decode()
|
||||
s1 = 'test string'
|
||||
s2 = 'test string2'
|
||||
if kind == 'DoesNotExist':
|
||||
|
@ -484,35 +477,35 @@ class RemoteRepository:
|
|||
|
||||
class RPCError(Exception):
|
||||
def __init__(self, unpacked):
|
||||
# for borg < 1.1: unpacked only has b'exception_class' as key
|
||||
# for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo'
|
||||
# for borg < 1.1: unpacked only has 'exception_class' as key
|
||||
# for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo'
|
||||
self.unpacked = unpacked
|
||||
|
||||
def get_message(self):
|
||||
if b'exception_short' in self.unpacked:
|
||||
return b'\n'.join(self.unpacked[b'exception_short']).decode()
|
||||
if 'exception_short' in self.unpacked:
|
||||
return '\n'.join(self.unpacked['exception_short'])
|
||||
else:
|
||||
return self.exception_class
|
||||
|
||||
@property
|
||||
def traceback(self):
|
||||
return self.unpacked.get(b'exception_trace', True)
|
||||
return self.unpacked.get('exception_trace', True)
|
||||
|
||||
@property
|
||||
def exception_class(self):
|
||||
return self.unpacked[b'exception_class'].decode()
|
||||
return self.unpacked['exception_class']
|
||||
|
||||
@property
|
||||
def exception_full(self):
|
||||
if b'exception_full' in self.unpacked:
|
||||
return b'\n'.join(self.unpacked[b'exception_full']).decode()
|
||||
if 'exception_full' in self.unpacked:
|
||||
return '\n'.join(self.unpacked['exception_full'])
|
||||
else:
|
||||
return self.get_message() + '\nRemote Exception (see remote log for the traceback)'
|
||||
|
||||
@property
|
||||
def sysinfo(self):
|
||||
if b'sysinfo' in self.unpacked:
|
||||
return self.unpacked[b'sysinfo'].decode()
|
||||
if 'sysinfo' in self.unpacked:
|
||||
return self.unpacked['sysinfo']
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
@ -577,9 +570,9 @@ def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock
|
|||
raise ConnectionClosedWithHint('Is borg working on the server?') from None
|
||||
if version == RPC_PROTOCOL_VERSION:
|
||||
self.dictFormat = False
|
||||
elif isinstance(version, dict) and b'server_version' in version:
|
||||
elif isinstance(version, dict) and 'server_version' in version:
|
||||
self.dictFormat = True
|
||||
self.server_version = version[b'server_version']
|
||||
self.server_version = version['server_version']
|
||||
else:
|
||||
raise Exception('Server insisted on using unsupported protocol version %s' % version)
|
||||
|
||||
|
@ -734,9 +727,9 @@ def pop_preload_msgid(chunkid):
|
|||
return msgid
|
||||
|
||||
def handle_error(unpacked):
|
||||
error = unpacked[b'exception_class'].decode()
|
||||
old_server = b'exception_args' not in unpacked
|
||||
args = unpacked.get(b'exception_args')
|
||||
error = unpacked['exception_class']
|
||||
old_server = 'exception_args' not in unpacked
|
||||
args = unpacked.get('exception_args')
|
||||
|
||||
if error == 'DoesNotExist':
|
||||
raise Repository.DoesNotExist(self.location.processed)
|
||||
|
@ -748,29 +741,29 @@ def handle_error(unpacked):
|
|||
if old_server:
|
||||
raise IntegrityError('(not available)')
|
||||
else:
|
||||
raise IntegrityError(args[0].decode())
|
||||
raise IntegrityError(args[0])
|
||||
elif error == 'AtticRepository':
|
||||
if old_server:
|
||||
raise Repository.AtticRepository('(not available)')
|
||||
else:
|
||||
raise Repository.AtticRepository(args[0].decode())
|
||||
raise Repository.AtticRepository(args[0])
|
||||
elif error == 'PathNotAllowed':
|
||||
if old_server:
|
||||
raise PathNotAllowed('(unknown)')
|
||||
else:
|
||||
raise PathNotAllowed(args[0].decode())
|
||||
raise PathNotAllowed(args[0])
|
||||
elif error == 'ParentPathDoesNotExist':
|
||||
raise Repository.ParentPathDoesNotExist(args[0].decode())
|
||||
raise Repository.ParentPathDoesNotExist(args[0])
|
||||
elif error == 'ObjectNotFound':
|
||||
if old_server:
|
||||
raise Repository.ObjectNotFound('(not available)', self.location.processed)
|
||||
else:
|
||||
raise Repository.ObjectNotFound(args[0].decode(), self.location.processed)
|
||||
raise Repository.ObjectNotFound(args[0], self.location.processed)
|
||||
elif error == 'InvalidRPCMethod':
|
||||
if old_server:
|
||||
raise InvalidRPCMethod('(not available)')
|
||||
else:
|
||||
raise InvalidRPCMethod(args[0].decode())
|
||||
raise InvalidRPCMethod(args[0])
|
||||
else:
|
||||
raise self.RPCError(unpacked)
|
||||
|
||||
|
@ -789,10 +782,10 @@ def handle_error(unpacked):
|
|||
try:
|
||||
unpacked = self.responses.pop(waiting_for[0])
|
||||
waiting_for.pop(0)
|
||||
if b'exception_class' in unpacked:
|
||||
if 'exception_class' in unpacked:
|
||||
handle_error(unpacked)
|
||||
else:
|
||||
yield unpacked[RESULTB]
|
||||
yield unpacked[RESULT]
|
||||
if not waiting_for and not calls:
|
||||
return
|
||||
except KeyError:
|
||||
|
@ -809,10 +802,10 @@ def handle_error(unpacked):
|
|||
else:
|
||||
return
|
||||
else:
|
||||
if b'exception_class' in unpacked:
|
||||
if 'exception_class' in unpacked:
|
||||
handle_error(unpacked)
|
||||
else:
|
||||
yield unpacked[RESULTB]
|
||||
yield unpacked[RESULT]
|
||||
if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
|
||||
w_fds = [self.stdin_fd]
|
||||
else:
|
||||
|
@ -829,26 +822,26 @@ def handle_error(unpacked):
|
|||
self.unpacker.feed(data)
|
||||
for unpacked in self.unpacker:
|
||||
if isinstance(unpacked, dict):
|
||||
msgid = unpacked[MSGIDB]
|
||||
msgid = unpacked[MSGID]
|
||||
elif isinstance(unpacked, tuple) and len(unpacked) == 4:
|
||||
# The first field 'type' was always 1 and has always been ignored
|
||||
_, msgid, error, res = unpacked
|
||||
if error:
|
||||
# ignore res, because it is only a fixed string anyway.
|
||||
unpacked = {MSGIDB: msgid, b'exception_class': error}
|
||||
unpacked = {MSGID: msgid, 'exception_class': error}
|
||||
else:
|
||||
unpacked = {MSGIDB: msgid, RESULTB: res}
|
||||
unpacked = {MSGID: msgid, RESULT: res}
|
||||
else:
|
||||
raise UnexpectedRPCDataFormatFromServer(data)
|
||||
if msgid in self.ignore_responses:
|
||||
self.ignore_responses.remove(msgid)
|
||||
# async methods never return values, but may raise exceptions.
|
||||
if b'exception_class' in unpacked:
|
||||
if 'exception_class' in unpacked:
|
||||
self.async_responses[msgid] = unpacked
|
||||
else:
|
||||
# we currently do not have async result values except "None",
|
||||
# so we do not add them into async_responses.
|
||||
if unpacked[RESULTB] is not None:
|
||||
if unpacked[RESULT] is not None:
|
||||
self.async_responses[msgid] = unpacked
|
||||
else:
|
||||
self.responses[msgid] = unpacked
|
||||
|
|
|
@ -516,16 +516,16 @@ def _read_integrity(self, transaction_id, key):
|
|||
integrity = msgpack.unpack(fd)
|
||||
except FileNotFoundError:
|
||||
return
|
||||
if integrity.get(b'version') != 2:
|
||||
logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file)
|
||||
if integrity.get('version') != 2:
|
||||
logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file)
|
||||
return
|
||||
return integrity[key].decode()
|
||||
return integrity[key]
|
||||
|
||||
def open_index(self, transaction_id, auto_recover=True):
|
||||
if transaction_id is None:
|
||||
return NSIndex()
|
||||
index_path = os.path.join(self.path, 'index.%d' % transaction_id)
|
||||
integrity_data = self._read_integrity(transaction_id, b'index')
|
||||
integrity_data = self._read_integrity(transaction_id, 'index')
|
||||
try:
|
||||
with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
|
||||
return NSIndex.read(fd)
|
||||
|
@ -575,7 +575,7 @@ def prepare_txn(self, transaction_id, do_cleanup=True):
|
|||
self.io.cleanup(transaction_id)
|
||||
hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
|
||||
index_path = os.path.join(self.path, 'index.%d' % transaction_id)
|
||||
integrity_data = self._read_integrity(transaction_id, b'hints')
|
||||
integrity_data = self._read_integrity(transaction_id, 'hints')
|
||||
try:
|
||||
with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd:
|
||||
hints = msgpack.unpack(fd)
|
||||
|
@ -588,23 +588,23 @@ def prepare_txn(self, transaction_id, do_cleanup=True):
|
|||
self.check_transaction()
|
||||
self.prepare_txn(transaction_id)
|
||||
return
|
||||
if hints[b'version'] == 1:
|
||||
if hints['version'] == 1:
|
||||
logger.debug('Upgrading from v1 hints.%d', transaction_id)
|
||||
self.segments = hints[b'segments']
|
||||
self.segments = hints['segments']
|
||||
self.compact = FreeSpace()
|
||||
self.storage_quota_use = 0
|
||||
self.shadow_index = {}
|
||||
for segment in sorted(hints[b'compact']):
|
||||
for segment in sorted(hints['compact']):
|
||||
logger.debug('Rebuilding sparse info for segment %d', segment)
|
||||
self._rebuild_sparse(segment)
|
||||
logger.debug('Upgrade to v2 hints complete')
|
||||
elif hints[b'version'] != 2:
|
||||
raise ValueError('Unknown hints file version: %d' % hints[b'version'])
|
||||
elif hints['version'] != 2:
|
||||
raise ValueError('Unknown hints file version: %d' % hints['version'])
|
||||
else:
|
||||
self.segments = hints[b'segments']
|
||||
self.compact = FreeSpace(hints[b'compact'])
|
||||
self.storage_quota_use = hints.get(b'storage_quota_use', 0)
|
||||
self.shadow_index = hints.get(b'shadow_index', {})
|
||||
self.segments = hints['segments']
|
||||
self.compact = FreeSpace(hints['compact'])
|
||||
self.storage_quota_use = hints.get('storage_quota_use', 0)
|
||||
self.shadow_index = hints.get('shadow_index', {})
|
||||
self.log_storage_quota()
|
||||
# Drop uncommitted segments in the shadow index
|
||||
for key, shadowed_segments in self.shadow_index.items():
|
||||
|
@ -621,16 +621,16 @@ def rename_tmp(file):
|
|||
os.rename(file + '.tmp', file)
|
||||
|
||||
hints = {
|
||||
b'version': 2,
|
||||
b'segments': self.segments,
|
||||
b'compact': self.compact,
|
||||
b'storage_quota_use': self.storage_quota_use,
|
||||
b'shadow_index': self.shadow_index,
|
||||
'version': 2,
|
||||
'segments': self.segments,
|
||||
'compact': self.compact,
|
||||
'storage_quota_use': self.storage_quota_use,
|
||||
'shadow_index': self.shadow_index,
|
||||
}
|
||||
integrity = {
|
||||
# Integrity version started at 2, the current hints version.
|
||||
# Thus, integrity version == hints version, for now.
|
||||
b'version': 2,
|
||||
'version': 2,
|
||||
}
|
||||
transaction_id = self.io.get_segments_transaction_id()
|
||||
assert transaction_id is not None
|
||||
|
@ -647,7 +647,7 @@ def rename_tmp(file):
|
|||
with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd:
|
||||
msgpack.pack(hints, fd)
|
||||
flush_and_sync(fd)
|
||||
integrity[b'hints'] = fd.integrity_data
|
||||
integrity['hints'] = fd.integrity_data
|
||||
|
||||
# Write repository index
|
||||
index_name = 'index.%d' % transaction_id
|
||||
|
@ -656,7 +656,7 @@ def rename_tmp(file):
|
|||
# XXX: Consider using SyncFile for index write-outs.
|
||||
self.index.write(fd)
|
||||
flush_and_sync(fd)
|
||||
integrity[b'index'] = fd.integrity_data
|
||||
integrity['index'] = fd.integrity_data
|
||||
|
||||
# Write integrity file, containing checksums of the hints and index files
|
||||
integrity_name = 'integrity.%d' % transaction_id
|
||||
|
|
|
@ -171,7 +171,7 @@ def make_chunks(self, items):
|
|||
return b''.join(msgpack.packb({'path': item}) for item in items)
|
||||
|
||||
def _validator(self, value):
|
||||
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
|
||||
return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz')
|
||||
|
||||
def process(self, input):
|
||||
unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
|
||||
|
@ -190,10 +190,10 @@ def test_extra_garbage_no_sync(self):
|
|||
(False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])]
|
||||
result = self.process(chunks)
|
||||
self.assert_equal(result, [
|
||||
{b'path': b'foo'}, {b'path': b'bar'},
|
||||
{'path': 'foo'}, {'path': 'bar'},
|
||||
103, 97, 114, 98, 97, 103, 101,
|
||||
{b'path': b'boo'},
|
||||
{b'path': b'baz'}])
|
||||
{'path': 'boo'},
|
||||
{'path': 'baz'}])
|
||||
|
||||
def split(self, left, length):
|
||||
parts = []
|
||||
|
@ -206,19 +206,19 @@ def test_correct_stream(self):
|
|||
chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2)
|
||||
input = [(False, chunks)]
|
||||
result = self.process(input)
|
||||
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
|
||||
self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}])
|
||||
|
||||
def test_missing_chunk(self):
|
||||
chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
|
||||
input = [(False, chunks[:3]), (True, chunks[4:])]
|
||||
result = self.process(input)
|
||||
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
|
||||
self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
|
||||
|
||||
def test_corrupt_chunk(self):
|
||||
chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
|
||||
input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
|
||||
result = self.process(input)
|
||||
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
|
||||
self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
@ -3623,14 +3623,14 @@ def test_init_defaults_to_argon2(self):
|
|||
self.cmd('init', '--encryption=repokey', self.repository_location)
|
||||
with Repository(self.repository_path) as repository:
|
||||
key = msgpack.unpackb(a2b_base64(repository.load_key()))
|
||||
assert key[b'algorithm'] == b'argon2 chacha20-poly1305'
|
||||
assert key['algorithm'] == 'argon2 chacha20-poly1305'
|
||||
|
||||
def test_init_with_explicit_key_algorithm(self):
|
||||
"""https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401"""
|
||||
self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
|
||||
with Repository(self.repository_path) as repository:
|
||||
key = msgpack.unpackb(a2b_base64(repository.load_key()))
|
||||
assert key[b'algorithm'] == b'sha256'
|
||||
assert key['algorithm'] == 'sha256'
|
||||
|
||||
def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
|
||||
self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location)
|
||||
|
@ -3640,7 +3640,7 @@ def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, ex
|
|||
|
||||
with Repository(self.repository_path) as repository:
|
||||
key = msgpack.unpackb(a2b_base64(repository.load_key()))
|
||||
assert key[b'algorithm'] == expected_algorithm.encode()
|
||||
assert key['algorithm'] == expected_algorithm
|
||||
|
||||
def test_change_passphrase_does_not_change_algorithm_argon2(self):
|
||||
self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
|
||||
|
@ -3655,7 +3655,7 @@ def verify_change_location_does_not_change_algorithm(self, given_algorithm, expe
|
|||
|
||||
with Repository(self.repository_path) as repository:
|
||||
key = msgpack.unpackb(a2b_base64(repository.load_key()))
|
||||
assert key[b'algorithm'] == expected_algorithm.encode()
|
||||
assert key['algorithm'] == expected_algorithm
|
||||
|
||||
def test_change_location_does_not_change_algorithm_argon2(self):
|
||||
self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
|
||||
|
@ -3969,7 +3969,7 @@ def test_not_required(self):
|
|||
key.change_passphrase(key._passphrase)
|
||||
|
||||
manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID)))
|
||||
del manifest[b'tam']
|
||||
del manifest['tam']
|
||||
repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest)))
|
||||
repository.commit(compact=False)
|
||||
output = self.cmd('list', '--debug', self.repository_location)
|
||||
|
|
|
@ -360,23 +360,23 @@ def test_round_trip(self, key):
|
|||
assert blob.startswith(b'\x82')
|
||||
|
||||
unpacked = msgpack.unpackb(blob)
|
||||
assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512'
|
||||
assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512'
|
||||
|
||||
unpacked, verified = key.unpack_and_verify_manifest(blob)
|
||||
assert verified
|
||||
assert unpacked[b'foo'] == b'bar'
|
||||
assert b'tam' not in unpacked
|
||||
assert unpacked['foo'] == 'bar'
|
||||
assert 'tam' not in unpacked
|
||||
|
||||
@pytest.mark.parametrize('which', (b'hmac', b'salt'))
|
||||
@pytest.mark.parametrize('which', ('hmac', 'salt'))
|
||||
def test_tampered(self, key, which):
|
||||
data = {'foo': 'bar'}
|
||||
blob = key.pack_and_authenticate_metadata(data)
|
||||
assert blob.startswith(b'\x82')
|
||||
|
||||
unpacked = msgpack.unpackb(blob, object_hook=StableDict)
|
||||
assert len(unpacked[b'tam'][which]) == 64
|
||||
unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32)
|
||||
assert len(unpacked[b'tam'][which]) == 64
|
||||
assert len(unpacked['tam'][which]) == 64
|
||||
unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32)
|
||||
assert len(unpacked['tam'][which]) == 64
|
||||
blob = msgpack.packb(unpacked)
|
||||
|
||||
with pytest.raises(TAMInvalid):
|
||||
|
@ -421,4 +421,4 @@ def to_dict(key):
|
|||
load_me = RepoKey.detect(repository, manifest_data=None)
|
||||
|
||||
assert to_dict(load_me) == to_dict(save_me)
|
||||
assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode()
|
||||
assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm
|
||||
|
|
|
@ -655,8 +655,8 @@ def _subtly_corrupted_hints_setup(self):
|
|||
hints = msgpack.unpack(fd)
|
||||
fd.seek(0)
|
||||
# Corrupt segment refcount
|
||||
assert hints[b'segments'][2] == 1
|
||||
hints[b'segments'][2] = 0
|
||||
assert hints['segments'][2] == 1
|
||||
hints['segments'][2] = 0
|
||||
msgpack.pack(hints, fd)
|
||||
fd.truncate()
|
||||
|
||||
|
|
Loading…
Reference in a new issue