Merge pull request #1149 from ThomasWaldmann/validkeys-in-manifest

item_keys in manifest, validate item keys
This commit is contained in:
enkore 2016-06-12 14:44:27 +02:00 committed by GitHub
commit 90d621ce35
3 changed files with 31 additions and 10 deletions

View File

@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format(
yield item yield item
def add_item(self, item): def add_item(self, item):
unknown_keys = set(item) - ITEM_KEYS unknown_keys = set(item) - self.manifest.item_keys
assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s', assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
','.join(k.decode('ascii') for k in unknown_keys)) ','.join(k.decode('ascii') for k in unknown_keys))
if self.show_progress: if self.show_progress:
@ -587,10 +587,13 @@ Number of files: {0.stats.nfiles}'''.format(
# this set must be kept complete, otherwise the RobustUnpacker might malfunction: # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks',
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
# this is the set of keys that are always present in items:
REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ])
def valid_msgpacked_item(d, item_keys_serialized): def valid_msgpacked_item(d, item_keys_serialized):
"""check if the data <d> looks like a msgpacked item metadata dict""" """check if the data <d> looks like a msgpacked item metadata dict"""
@ -623,9 +626,9 @@ def valid_msgpacked_item(d, item_keys_serialized):
class RobustUnpacker: class RobustUnpacker:
"""A restartable/robust version of the streaming msgpack unpacker """A restartable/robust version of the streaming msgpack unpacker
""" """
def __init__(self, validator): def __init__(self, validator, item_keys):
super().__init__() super().__init__()
self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS] self.item_keys = [msgpack.packb(name) for name in item_keys]
self.validator = validator self.validator = validator
self._buffered_data = [] self._buffered_data = []
self._resync = False self._resync = False
@ -729,6 +732,11 @@ class ArchiveChecker:
Iterates through all objects in the repository looking for archive metadata blocks. Iterates through all objects in the repository looking for archive metadata blocks.
""" """
logger.info('Rebuilding missing manifest, this might take some time...') logger.info('Rebuilding missing manifest, this might take some time...')
# as we have lost the manifest, we do not know any more what valid item keys we had.
# collecting any key we encounter in a damaged repo seems unwise, thus we just use
# the hardcoded list from the source code. thus, it is not recommended to rebuild a
# lost manifest on a older borg version than the most recent one that was ever used
# within this repository (assuming that newer borg versions support more item keys).
manifest = Manifest(self.key, self.repository) manifest = Manifest(self.key, self.repository)
for chunk_id, _ in self.chunks.iteritems(): for chunk_id, _ in self.chunks.iteritems():
cdata = self.repository.get(chunk_id) cdata = self.repository.get(chunk_id)
@ -806,7 +814,8 @@ class ArchiveChecker:
Missing item chunks will be skipped and the msgpack stream will be restarted Missing item chunks will be skipped and the msgpack stream will be restarted
""" """
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) item_keys = self.manifest.item_keys
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item, item_keys)
_state = 0 _state = 0
def missing_chunk_detector(chunk_id): def missing_chunk_detector(chunk_id):
@ -821,6 +830,12 @@ class ArchiveChecker:
self.error_found = True self.error_found = True
logger.error(msg) logger.error(msg)
def valid_item(obj):
if not isinstance(obj, StableDict):
return False
keys = set(obj)
return REQUIRED_ITEM_KEYS.issubset(keys) and keys.issubset(item_keys)
i = 0 i = 0
for state, items in groupby(archive[b'items'], missing_chunk_detector): for state, items in groupby(archive[b'items'], missing_chunk_detector):
items = list(items) items = list(items)
@ -835,7 +850,7 @@ class ArchiveChecker:
unpacker.feed(self.key.decrypt(chunk_id, cdata)) unpacker.feed(self.key.decrypt(chunk_id, cdata))
try: try:
for item in unpacker: for item in unpacker:
if isinstance(item, dict): if valid_item(item):
yield item yield item
else: else:
report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)

View File

@ -85,16 +85,19 @@ class Manifest:
MANIFEST_ID = b'\0' * 32 MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository): def __init__(self, key, repository, item_keys=None):
from .archive import ITEM_KEYS
self.archives = {} self.archives = {}
self.config = {} self.config = {}
self.key = key self.key = key
self.repository = repository self.repository = repository
self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
@classmethod @classmethod
def load(cls, repository, key=None): def load(cls, repository, key=None):
from .key import key_factory from .key import key_factory
from .repository import Repository from .repository import Repository
from .archive import ITEM_KEYS
try: try:
cdata = repository.get(cls.MANIFEST_ID) cdata = repository.get(cls.MANIFEST_ID)
except Repository.ObjectNotFound: except Repository.ObjectNotFound:
@ -112,6 +115,8 @@ class Manifest:
if manifest.timestamp: if manifest.timestamp:
manifest.timestamp = manifest.timestamp.decode('ascii') manifest.timestamp = manifest.timestamp.decode('ascii')
manifest.config = m[b'config'] manifest.config = m[b'config']
# valid item keys are whatever is known in the repo or every key we know
manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS
return manifest, key return manifest, key
def write(self): def write(self):
@ -121,6 +126,7 @@ class Manifest:
'archives': self.archives, 'archives': self.archives,
'timestamp': self.timestamp, 'timestamp': self.timestamp,
'config': self.config, 'config': self.config,
'item_keys': tuple(self.item_keys),
})) }))
self.id = self.key.id_hash(data) self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))

View File

@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase):
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
def process(self, input): def process(self, input):
unpacker = RobustUnpacker(validator=self._validator) unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
result = [] result = []
for should_sync, chunks in input: for should_sync, chunks in input:
if should_sync: if should_sync: