borg/src/borg/helpers/msgpack.py

237 lines
7.4 KiB
Python

"""
wrapping msgpack
================
We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code.
Packing
-------
- use_bin_type = True (used by borg since borg 2.0)
This is used to generate output according to new msgpack 2.0 spec.
This cleanly keeps bytes and str types apart.
- use_bin_type = False (used by borg < 1.3)
This creates output according to the older msgpack spec.
BAD: str and bytes were packed into same "raw" representation.
- unicode_errors = 'surrogateescape'
Guess backup applications are one of the rare cases when this needs to be used.
It is needed because borg also needs to deal with data that does not cleanly encode/decode using utf-8.
There's a lot of crap out there, e.g. in filenames and as a backup tool, we must keep them as good as possible.
Unpacking
---------
- raw = False (used by borg since borg 2.0)
We already can use this with borg 2.0 due to the type conversion to the desired type in item.py update_internal
methods. This type conversion code can be removed in future, when we do not have to deal with data any more
that was packed the old way.
It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
- raw = True (the old way, used by borg < 1.3)
- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
As of borg 2.0, we have fixed most of the msgpack str/bytes mess, #968.
Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
But from now on, borg only **writes** new data according to the new msgpack 2.0 spec,
thus we can remove some legacy support in a later borg release (some places are marked with "legacy").
current way in msgpack terms
----------------------------
- pack with use_bin_type=True (according to msgpack 2.0 spec)
- packs str -> raw and bytes -> bin
- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape')
- unpacks bin to bytes and raw to str (thus we need to convert to desired type if we want bytes from "raw")
"""
from .datastruct import StableDict
from ..constants import * # NOQA
from msgpack import Packer as mp_Packer
from msgpack import packb as mp_packb
from msgpack import pack as mp_pack
from msgpack import Unpacker as mp_Unpacker
from msgpack import unpackb as mp_unpackb
from msgpack import unpack as mp_unpack
from msgpack import version as mp_version
from msgpack import ExtType, Timestamp
from msgpack import OutOfData
version = mp_version
USE_BIN_TYPE = True
RAW = False
UNICODE_ERRORS = "surrogateescape"
class PackException(Exception):
"""Exception while msgpack packing"""
class UnpackException(Exception):
"""Exception while msgpack unpacking"""
class Packer(mp_Packer):
def __init__(
self,
*,
default=None,
unicode_errors=UNICODE_ERRORS,
use_single_float=False,
autoreset=True,
use_bin_type=USE_BIN_TYPE,
strict_types=False,
):
assert unicode_errors == UNICODE_ERRORS
super().__init__(
default=default,
unicode_errors=unicode_errors,
use_single_float=use_single_float,
autoreset=autoreset,
use_bin_type=use_bin_type,
strict_types=strict_types,
)
def pack(self, obj):
try:
return super().pack(obj)
except Exception as e:
raise PackException(e)
def packb(o, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
assert unicode_errors == UNICODE_ERRORS
try:
return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
except Exception as e:
raise PackException(e)
def pack(o, stream, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
assert unicode_errors == UNICODE_ERRORS
try:
return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
except Exception as e:
raise PackException(e)
class Unpacker(mp_Unpacker):
def __init__(
self,
file_like=None,
*,
read_size=0,
use_list=True,
raw=RAW,
object_hook=None,
object_pairs_hook=None,
list_hook=None,
unicode_errors=UNICODE_ERRORS,
max_buffer_size=0,
ext_hook=ExtType,
strict_map_key=False,
):
assert raw == RAW
assert unicode_errors == UNICODE_ERRORS
kw = dict(
file_like=file_like,
read_size=read_size,
use_list=use_list,
raw=raw,
object_hook=object_hook,
object_pairs_hook=object_pairs_hook,
list_hook=list_hook,
unicode_errors=unicode_errors,
max_buffer_size=max_buffer_size,
ext_hook=ext_hook,
strict_map_key=strict_map_key,
)
super().__init__(**kw)
def unpack(self):
try:
return super().unpack()
except OutOfData:
raise
except Exception as e:
raise UnpackException(e)
def __next__(self):
try:
return super().__next__()
except StopIteration:
raise
except Exception as e:
raise UnpackException(e)
next = __next__
def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs):
assert raw == RAW
assert unicode_errors == UNICODE_ERRORS
try:
kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key)
kw.update(kwargs)
return mp_unpackb(packed, **kw)
except Exception as e:
raise UnpackException(e)
def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs):
assert raw == RAW
assert unicode_errors == UNICODE_ERRORS
try:
kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key)
kw.update(kwargs)
return mp_unpack(stream, **kw)
except Exception as e:
raise UnpackException(e)
# msgpacking related utilities -----------------------------------------------
def is_slow_msgpack():
import msgpack
import msgpack.fallback
return msgpack.Packer is msgpack.fallback.Packer
def is_supported_msgpack():
# DO NOT CHANGE OR REMOVE! See also requirements and comments in pyproject.toml.
import msgpack
if msgpack.version in []: # < add bad releases here to deny list
return False
return (1, 0, 3) <= msgpack.version <= (1, 0, 8)
def get_limited_unpacker(kind):
"""return a limited Unpacker because we should not trust msgpack data received from remote"""
# Note: msgpack >= 0.6.1 auto-computes DoS-safe max values from len(data) for
# unpack(data) or from max_buffer_size for Unpacker(max_buffer_size=N).
args = dict(use_list=False, max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE)) # return tuples, not lists
if kind in ("server", "client"):
pass # nothing special
elif kind in ("manifest", "archive", "key"):
args.update(dict(use_list=True, object_hook=StableDict)) # default value
else:
raise ValueError('kind must be "server", "client", "manifest", "archive" or "key"')
return Unpacker(**args)
def int_to_timestamp(ns):
assert isinstance(ns, int)
return Timestamp.from_unix_nano(ns)
def timestamp_to_int(ts):
assert isinstance(ts, Timestamp)
return ts.to_unix_nano()