mirror of https://github.com/borgbackup/borg.git
237 lines
7.4 KiB
Python
237 lines
7.4 KiB
Python
"""
|
|
wrapping msgpack
|
|
================
|
|
|
|
We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code.
|
|
|
|
Packing
|
|
-------
|
|
- use_bin_type = True (used by borg since borg 2.0)
|
|
This is used to generate output according to new msgpack 2.0 spec.
|
|
This cleanly keeps bytes and str types apart.
|
|
|
|
- use_bin_type = False (used by borg < 1.3)
|
|
This creates output according to the older msgpack spec.
|
|
BAD: str and bytes were packed into same "raw" representation.
|
|
|
|
- unicode_errors = 'surrogateescape'
|
|
Guess backup applications are one of the rare cases when this needs to be used.
|
|
It is needed because borg also needs to deal with data that does not cleanly encode/decode using utf-8.
|
|
There's a lot of crap out there, e.g. in filenames and as a backup tool, we must keep them as good as possible.
|
|
|
|
Unpacking
|
|
---------
|
|
- raw = False (used by borg since borg 2.0)
|
|
We already can use this with borg 2.0 due to the type conversion to the desired type in item.py update_internal
|
|
methods. This type conversion code can be removed in future, when we do not have to deal with data any more
|
|
that was packed the old way.
|
|
It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
|
|
|
|
- raw = True (the old way, used by borg < 1.3)
|
|
|
|
- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
|
|
|
|
As of borg 2.0, we have fixed most of the msgpack str/bytes mess, #968.
|
|
Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
|
|
But from now on, borg only **writes** new data according to the new msgpack 2.0 spec,
|
|
thus we can remove some legacy support in a later borg release (some places are marked with "legacy").
|
|
|
|
current way in msgpack terms
|
|
----------------------------
|
|
|
|
- pack with use_bin_type=True (according to msgpack 2.0 spec)
|
|
- packs str -> raw and bytes -> bin
|
|
- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape')
|
|
- unpacks bin to bytes and raw to str (thus we need to convert to desired type if we want bytes from "raw")
|
|
"""
|
|
|
|
from .datastruct import StableDict
|
|
from ..constants import * # NOQA
|
|
|
|
from msgpack import Packer as mp_Packer
|
|
from msgpack import packb as mp_packb
|
|
from msgpack import pack as mp_pack
|
|
from msgpack import Unpacker as mp_Unpacker
|
|
from msgpack import unpackb as mp_unpackb
|
|
from msgpack import unpack as mp_unpack
|
|
from msgpack import version as mp_version
|
|
|
|
from msgpack import ExtType, Timestamp
|
|
from msgpack import OutOfData
|
|
|
|
|
|
version = mp_version
|
|
|
|
USE_BIN_TYPE = True
|
|
RAW = False
|
|
UNICODE_ERRORS = "surrogateescape"
|
|
|
|
|
|
class PackException(Exception):
|
|
"""Exception while msgpack packing"""
|
|
|
|
|
|
class UnpackException(Exception):
|
|
"""Exception while msgpack unpacking"""
|
|
|
|
|
|
class Packer(mp_Packer):
|
|
def __init__(
|
|
self,
|
|
*,
|
|
default=None,
|
|
unicode_errors=UNICODE_ERRORS,
|
|
use_single_float=False,
|
|
autoreset=True,
|
|
use_bin_type=USE_BIN_TYPE,
|
|
strict_types=False,
|
|
):
|
|
assert unicode_errors == UNICODE_ERRORS
|
|
super().__init__(
|
|
default=default,
|
|
unicode_errors=unicode_errors,
|
|
use_single_float=use_single_float,
|
|
autoreset=autoreset,
|
|
use_bin_type=use_bin_type,
|
|
strict_types=strict_types,
|
|
)
|
|
|
|
def pack(self, obj):
|
|
try:
|
|
return super().pack(obj)
|
|
except Exception as e:
|
|
raise PackException(e)
|
|
|
|
|
|
def packb(o, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
|
|
assert unicode_errors == UNICODE_ERRORS
|
|
try:
|
|
return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
|
|
except Exception as e:
|
|
raise PackException(e)
|
|
|
|
|
|
def pack(o, stream, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
|
|
assert unicode_errors == UNICODE_ERRORS
|
|
try:
|
|
return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
|
|
except Exception as e:
|
|
raise PackException(e)
|
|
|
|
|
|
class Unpacker(mp_Unpacker):
|
|
def __init__(
|
|
self,
|
|
file_like=None,
|
|
*,
|
|
read_size=0,
|
|
use_list=True,
|
|
raw=RAW,
|
|
object_hook=None,
|
|
object_pairs_hook=None,
|
|
list_hook=None,
|
|
unicode_errors=UNICODE_ERRORS,
|
|
max_buffer_size=0,
|
|
ext_hook=ExtType,
|
|
strict_map_key=False,
|
|
):
|
|
assert raw == RAW
|
|
assert unicode_errors == UNICODE_ERRORS
|
|
kw = dict(
|
|
file_like=file_like,
|
|
read_size=read_size,
|
|
use_list=use_list,
|
|
raw=raw,
|
|
object_hook=object_hook,
|
|
object_pairs_hook=object_pairs_hook,
|
|
list_hook=list_hook,
|
|
unicode_errors=unicode_errors,
|
|
max_buffer_size=max_buffer_size,
|
|
ext_hook=ext_hook,
|
|
strict_map_key=strict_map_key,
|
|
)
|
|
super().__init__(**kw)
|
|
|
|
def unpack(self):
|
|
try:
|
|
return super().unpack()
|
|
except OutOfData:
|
|
raise
|
|
except Exception as e:
|
|
raise UnpackException(e)
|
|
|
|
def __next__(self):
|
|
try:
|
|
return super().__next__()
|
|
except StopIteration:
|
|
raise
|
|
except Exception as e:
|
|
raise UnpackException(e)
|
|
|
|
next = __next__
|
|
|
|
|
|
def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs):
|
|
assert raw == RAW
|
|
assert unicode_errors == UNICODE_ERRORS
|
|
try:
|
|
kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key)
|
|
kw.update(kwargs)
|
|
return mp_unpackb(packed, **kw)
|
|
except Exception as e:
|
|
raise UnpackException(e)
|
|
|
|
|
|
def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs):
|
|
assert raw == RAW
|
|
assert unicode_errors == UNICODE_ERRORS
|
|
try:
|
|
kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key)
|
|
kw.update(kwargs)
|
|
return mp_unpack(stream, **kw)
|
|
except Exception as e:
|
|
raise UnpackException(e)
|
|
|
|
|
|
# msgpacking related utilities -----------------------------------------------
|
|
|
|
|
|
def is_slow_msgpack():
|
|
import msgpack
|
|
import msgpack.fallback
|
|
|
|
return msgpack.Packer is msgpack.fallback.Packer
|
|
|
|
|
|
def is_supported_msgpack():
|
|
# DO NOT CHANGE OR REMOVE! See also requirements and comments in pyproject.toml.
|
|
import msgpack
|
|
|
|
if msgpack.version in []: # < add bad releases here to deny list
|
|
return False
|
|
return (1, 0, 3) <= msgpack.version <= (1, 0, 8)
|
|
|
|
|
|
def get_limited_unpacker(kind):
|
|
"""return a limited Unpacker because we should not trust msgpack data received from remote"""
|
|
# Note: msgpack >= 0.6.1 auto-computes DoS-safe max values from len(data) for
|
|
# unpack(data) or from max_buffer_size for Unpacker(max_buffer_size=N).
|
|
args = dict(use_list=False, max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE)) # return tuples, not lists
|
|
if kind in ("server", "client"):
|
|
pass # nothing special
|
|
elif kind in ("manifest", "archive", "key"):
|
|
args.update(dict(use_list=True, object_hook=StableDict)) # default value
|
|
else:
|
|
raise ValueError('kind must be "server", "client", "manifest", "archive" or "key"')
|
|
return Unpacker(**args)
|
|
|
|
|
|
def int_to_timestamp(ns):
|
|
assert isinstance(ns, int)
|
|
return Timestamp.from_unix_nano(ns)
|
|
|
|
|
|
def timestamp_to_int(ts):
|
|
assert isinstance(ts, Timestamp)
|
|
return ts.to_unix_nano()
|