diff --git a/attic/archive.py b/attic/archive.py index 850970e78..07652e2e5 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -431,6 +431,10 @@ class RobustUnpacker(): while self._resync: if not data: raise StopIteration + # Abort early if the data does not look like a serialized item + if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0) or not b'\xa4path' in data: + data = data[1:] + continue self._unpacker = msgpack.Unpacker(object_hook=StableDict) self._unpacker.feed(data) try: diff --git a/attic/testsuite/archive.py b/attic/testsuite/archive.py index 79b639530..8d478f5f2 100644 --- a/attic/testsuite/archive.py +++ b/attic/testsuite/archive.py @@ -35,10 +35,10 @@ class ChunkBufferTestCase(AtticTestCase): class RobustUnpackerTestCase(AtticTestCase): def make_chunks(self, items): - return b''.join(msgpack.packb(item) for item in items) + return b''.join(msgpack.packb({'path': item}) for item in items) def _validator(self, value): - return value in (b'foo', b'bar', b'boo', b'baz') + return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') def process(self, input): unpacker = RobustUnpacker(validator=self._validator) @@ -56,7 +56,11 @@ class RobustUnpackerTestCase(AtticTestCase): chunks = [(False, [self.make_chunks([b'foo', b'bar'])]), (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])] result = self.process(chunks) - self.assert_equal(result, [b'foo', b'bar', 103, 97, 114, 98, 97, 103, 101, b'boo', b'baz']) + self.assert_equal(result, [ + {b'path': b'foo'}, {b'path': b'bar'}, + 103, 97, 114, 98, 97, 103, 101, + {b'path': b'boo'}, + {b'path': b'baz'}]) def split(self, left, length): parts = [] @@ -69,16 +73,16 @@ class RobustUnpackerTestCase(AtticTestCase): chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2) input = [(False, chunks)] result = self.process(input) - self.assert_equal(result, [b'foo', b'bar', b'boo', b'baz']) + self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}]) def test_missing_chunk(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2) + chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) input = [(False, chunks[:3]), (True, chunks[4:])] result = self.process(input) - self.assert_equal(result, [b'foo', b'boo', b'baz']) + self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) def test_corrupt_chunk(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2) + chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) - self.assert_equal(result, [b'foo', b'boo', b'baz']) + self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])