mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-30 19:21:17 +00:00
fix resync and msgpacked item qualifier, fixes #1135
when trying to resync and skip invalid data, borg tries to qualify a byte sequence as valid-looking msgpacked item metadata dict (or not) before even invoking msgpack's unpack. besides previously hard to understand code, there were 2 issues: - a missing check for map16 - this type is what msgpack uses if the dict has more than 15 items (could happen in future, not for 1.0.x). - missing checks for str8/16/32 - str16 is what msgpack uses if the bytestring has more than 31 bytes (borg does not have that long key names, thus this wasn't causing any harm) this misqualification (valid data considered invalid) could lead to a wrong resync, skipping valid items. added more comments and tests.
This commit is contained in:
parent
a8c4c9ffab
commit
918e0b2a52
2 changed files with 64 additions and 11 deletions
|
@ -592,6 +592,34 @@ def _open_rb(path):
|
|||
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
|
||||
|
||||
|
||||
def valid_msgpacked_item(d, item_keys_serialized):
|
||||
"""check if the data <d> looks like a msgpacked item metadata dict"""
|
||||
d_len = len(d)
|
||||
if d_len == 0:
|
||||
return False
|
||||
if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements)
|
||||
offs = 1
|
||||
elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements)
|
||||
offs = 3
|
||||
else:
|
||||
# object is not a map (dict)
|
||||
# note: we must not have item dicts with > 2^16-1 elements
|
||||
return False
|
||||
if d_len <= offs:
|
||||
return False
|
||||
# is the first dict key a bytestring?
|
||||
if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars)
|
||||
pass
|
||||
elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32
|
||||
pass
|
||||
else:
|
||||
# key is not a bytestring
|
||||
return False
|
||||
# is the bytestring any of the expected key names?
|
||||
key_serialized = d[offs:]
|
||||
return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized)
|
||||
|
||||
|
||||
class RobustUnpacker:
|
||||
"""A restartable/robust version of the streaming msgpack unpacker
|
||||
"""
|
||||
|
@ -622,18 +650,10 @@ def __next__(self):
|
|||
while self._resync:
|
||||
if not data:
|
||||
raise StopIteration
|
||||
# Abort early if the data does not look like a serialized dict
|
||||
if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
|
||||
# Abort early if the data does not look like a serialized item dict
|
||||
if not valid_msgpacked_item(data, self.item_keys):
|
||||
data = data[1:]
|
||||
continue
|
||||
# Make sure it looks like an item dict
|
||||
for pattern in self.item_keys:
|
||||
if data[1:].startswith(pattern):
|
||||
break
|
||||
else:
|
||||
data = data[1:]
|
||||
continue
|
||||
|
||||
self._unpacker = msgpack.Unpacker(object_hook=StableDict)
|
||||
self._unpacker.feed(data)
|
||||
try:
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
from unittest.mock import Mock
|
||||
|
||||
import msgpack
|
||||
import pytest
|
||||
|
||||
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
|
||||
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS
|
||||
from ..key import PlaintextKey
|
||||
from ..helpers import Manifest
|
||||
from . import BaseTestCase
|
||||
|
@ -112,3 +113,35 @@ def test_corrupt_chunk(self):
|
|||
input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
|
||||
result = self.process(input)
|
||||
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def item_keys_serialized():
|
||||
return [msgpack.packb(name) for name in ITEM_KEYS]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('packed',
|
||||
[b'', b'x', b'foobar', ] +
|
||||
[msgpack.packb(o) for o in (
|
||||
[None, 0, 0.0, False, '', {}, [], ()] +
|
||||
[42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
|
||||
)])
|
||||
def test_invalid_msgpacked_item(packed, item_keys_serialized):
|
||||
assert not valid_msgpacked_item(packed, item_keys_serialized)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('packed',
|
||||
[msgpack.packb(o) for o in [
|
||||
{b'path': b'/a/b/c'}, # small (different msgpack mapping type!)
|
||||
dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets
|
||||
dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets
|
||||
]])
|
||||
def test_valid_msgpacked_items(packed, item_keys_serialized):
|
||||
assert valid_msgpacked_item(packed, item_keys_serialized)
|
||||
|
||||
|
||||
def test_key_length_msgpacked_items():
|
||||
key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type
|
||||
data = {key: b''}
|
||||
item_keys_serialized = [msgpack.packb(key), ]
|
||||
assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized)
|
||||
|
|
Loading…
Reference in a new issue