store item_keys into manifest, fixes #1147

we need a list of valid item metadata keys. using a list stored in the repo manifest
is more future-proof than the hardcoded ITEM_KEYS in the source code.

keys that are in union(item_keys_from_repo, item_keys_from_source) are considered valid.
This commit is contained in:
Thomas Waldmann 2016-06-09 23:38:12 +02:00
parent d9b77d57c8
commit 78121a8d04
3 changed files with 21 additions and 9 deletions

View File

@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format(
yield item yield item
def add_item(self, item): def add_item(self, item):
unknown_keys = set(item) - ITEM_KEYS unknown_keys = set(item) - self.manifest.item_keys
assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s', assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
','.join(k.decode('ascii') for k in unknown_keys)) ','.join(k.decode('ascii') for k in unknown_keys))
if self.show_progress: if self.show_progress:
@ -587,9 +587,9 @@ Number of files: {0.stats.nfiles}'''.format(
# this set must be kept complete, otherwise the RobustUnpacker might malfunction: # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks',
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
def valid_msgpacked_item(d, item_keys_serialized): def valid_msgpacked_item(d, item_keys_serialized):
@ -623,9 +623,9 @@ def valid_msgpacked_item(d, item_keys_serialized):
class RobustUnpacker: class RobustUnpacker:
"""A restartable/robust version of the streaming msgpack unpacker """A restartable/robust version of the streaming msgpack unpacker
""" """
def __init__(self, validator): def __init__(self, validator, item_keys):
super().__init__() super().__init__()
self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS] self.item_keys = [msgpack.packb(name) for name in item_keys]
self.validator = validator self.validator = validator
self._buffered_data = [] self._buffered_data = []
self._resync = False self._resync = False
@ -729,6 +729,11 @@ class ArchiveChecker:
Iterates through all objects in the repository looking for archive metadata blocks. Iterates through all objects in the repository looking for archive metadata blocks.
""" """
logger.info('Rebuilding missing manifest, this might take some time...') logger.info('Rebuilding missing manifest, this might take some time...')
# as we have lost the manifest, we do not know any more what valid item keys we had.
# collecting any key we encounter in a damaged repo seems unwise, thus we just use
# the hardcoded list from the source code. thus, it is not recommended to rebuild a
# lost manifest on a older borg version than the most recent one that was ever used
# within this repository (assuming that newer borg versions support more item keys).
manifest = Manifest(self.key, self.repository) manifest = Manifest(self.key, self.repository)
for chunk_id, _ in self.chunks.iteritems(): for chunk_id, _ in self.chunks.iteritems():
cdata = self.repository.get(chunk_id) cdata = self.repository.get(chunk_id)
@ -806,7 +811,8 @@ class ArchiveChecker:
Missing item chunks will be skipped and the msgpack stream will be restarted Missing item chunks will be skipped and the msgpack stream will be restarted
""" """
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item,
self.manifest.item_keys)
_state = 0 _state = 0
def missing_chunk_detector(chunk_id): def missing_chunk_detector(chunk_id):

View File

@ -85,16 +85,19 @@ class Manifest:
MANIFEST_ID = b'\0' * 32 MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository): def __init__(self, key, repository, item_keys=None):
from .archive import ITEM_KEYS
self.archives = {} self.archives = {}
self.config = {} self.config = {}
self.key = key self.key = key
self.repository = repository self.repository = repository
self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
@classmethod @classmethod
def load(cls, repository, key=None): def load(cls, repository, key=None):
from .key import key_factory from .key import key_factory
from .repository import Repository from .repository import Repository
from .archive import ITEM_KEYS
try: try:
cdata = repository.get(cls.MANIFEST_ID) cdata = repository.get(cls.MANIFEST_ID)
except Repository.ObjectNotFound: except Repository.ObjectNotFound:
@ -112,6 +115,8 @@ class Manifest:
if manifest.timestamp: if manifest.timestamp:
manifest.timestamp = manifest.timestamp.decode('ascii') manifest.timestamp = manifest.timestamp.decode('ascii')
manifest.config = m[b'config'] manifest.config = m[b'config']
# valid item keys are whatever is known in the repo or every key we know
manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS
return manifest, key return manifest, key
def write(self): def write(self):
@ -121,6 +126,7 @@ class Manifest:
'archives': self.archives, 'archives': self.archives,
'timestamp': self.timestamp, 'timestamp': self.timestamp,
'config': self.config, 'config': self.config,
'item_keys': tuple(self.item_keys),
})) }))
self.id = self.key.id_hash(data) self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))

View File

@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase):
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
def process(self, input): def process(self, input):
unpacker = RobustUnpacker(validator=self._validator) unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
result = [] result = []
for should_sync, chunks in input: for should_sync, chunks in input:
if should_sync: if should_sync: