fix resync and msgpacked item qualifier, fixes #1135

when trying to resync and skip invalid data, borg tries to qualify a byte sequence as
valid-looking msgpacked item metadata dict (or not) before even invoking msgpack's unpack.

besides previously hard to understand code, there were 2 issues:

- a missing check for map16 - this type is what msgpack uses if the dict has more than
15 items (could happen in future, not for 1.0.x).

- missing checks for str8/16/32 - str16 is what msgpack uses if the bytestring has more than 31 bytes
(borg does not have that long key names, thus this wasn't causing any harm)

this misqualification (valid data considered invalid) could lead to a wrong resync, skipping valid items.

added more comments and tests.
This commit is contained in:
Thomas Waldmann 2016-06-09 21:08:39 +02:00
parent a8c4c9ffab
commit 918e0b2a52
2 changed files with 64 additions and 11 deletions

View File

@ -592,6 +592,34 @@ ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
def valid_msgpacked_item(d, item_keys_serialized):
"""check if the data <d> looks like a msgpacked item metadata dict"""
d_len = len(d)
if d_len == 0:
return False
if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements)
offs = 1
elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements)
offs = 3
else:
# object is not a map (dict)
# note: we must not have item dicts with > 2^16-1 elements
return False
if d_len <= offs:
return False
# is the first dict key a bytestring?
if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars)
pass
elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32
pass
else:
# key is not a bytestring
return False
# is the bytestring any of the expected key names?
key_serialized = d[offs:]
return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized)
class RobustUnpacker:
"""A restartable/robust version of the streaming msgpack unpacker
"""
@ -622,18 +650,10 @@ class RobustUnpacker:
while self._resync:
if not data:
raise StopIteration
# Abort early if the data does not look like a serialized dict
if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
# Abort early if the data does not look like a serialized item dict
if not valid_msgpacked_item(data, self.item_keys):
data = data[1:]
continue
# Make sure it looks like an item dict
for pattern in self.item_keys:
if data[1:].startswith(pattern):
break
else:
data = data[1:]
continue
self._unpacker = msgpack.Unpacker(object_hook=StableDict)
self._unpacker.feed(data)
try:

View File

@ -2,8 +2,9 @@ from datetime import datetime, timezone
from unittest.mock import Mock
import msgpack
import pytest
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS
from ..key import PlaintextKey
from ..helpers import Manifest
from . import BaseTestCase
@ -112,3 +113,35 @@ class RobustUnpackerTestCase(BaseTestCase):
input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
result = self.process(input)
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
@pytest.fixture
def item_keys_serialized():
return [msgpack.packb(name) for name in ITEM_KEYS]
@pytest.mark.parametrize('packed',
[b'', b'x', b'foobar', ] +
[msgpack.packb(o) for o in (
[None, 0, 0.0, False, '', {}, [], ()] +
[42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
)])
def test_invalid_msgpacked_item(packed, item_keys_serialized):
assert not valid_msgpacked_item(packed, item_keys_serialized)
@pytest.mark.parametrize('packed',
[msgpack.packb(o) for o in [
{b'path': b'/a/b/c'}, # small (different msgpack mapping type!)
dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets
dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets
]])
def test_valid_msgpacked_items(packed, item_keys_serialized):
assert valid_msgpacked_item(packed, item_keys_serialized)
def test_key_length_msgpacked_items():
key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type
data = {key: b''}
item_keys_serialized = [msgpack.packb(key), ]
assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized)