mirror of
https://github.com/borgbackup/borg.git
synced 2025-03-16 17:06:07 +00:00
store item_keys into manifest, fixes #1147
we need a list of valid item metadata keys. using a list stored in the repo manifest is more future-proof than the hardcoded ITEM_KEYS in the source code. keys that are in union(item_keys_from_repo, item_keys_from_source) are considered valid.
This commit is contained in:
parent
d9b77d57c8
commit
78121a8d04
3 changed files with 21 additions and 9 deletions
|
@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
def add_item(self, item):
|
def add_item(self, item):
|
||||||
unknown_keys = set(item) - ITEM_KEYS
|
unknown_keys = set(item) - self.manifest.item_keys
|
||||||
assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
|
assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
|
||||||
','.join(k.decode('ascii') for k in unknown_keys))
|
','.join(k.decode('ascii') for k in unknown_keys))
|
||||||
if self.show_progress:
|
if self.show_progress:
|
||||||
|
@ -587,7 +587,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
||||||
|
|
||||||
|
|
||||||
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
|
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
|
||||||
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
|
ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks',
|
||||||
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
|
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
|
||||||
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
|
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
|
||||||
|
|
||||||
|
@ -623,9 +623,9 @@ def valid_msgpacked_item(d, item_keys_serialized):
|
||||||
class RobustUnpacker:
|
class RobustUnpacker:
|
||||||
"""A restartable/robust version of the streaming msgpack unpacker
|
"""A restartable/robust version of the streaming msgpack unpacker
|
||||||
"""
|
"""
|
||||||
def __init__(self, validator):
|
def __init__(self, validator, item_keys):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
|
self.item_keys = [msgpack.packb(name) for name in item_keys]
|
||||||
self.validator = validator
|
self.validator = validator
|
||||||
self._buffered_data = []
|
self._buffered_data = []
|
||||||
self._resync = False
|
self._resync = False
|
||||||
|
@ -729,6 +729,11 @@ class ArchiveChecker:
|
||||||
Iterates through all objects in the repository looking for archive metadata blocks.
|
Iterates through all objects in the repository looking for archive metadata blocks.
|
||||||
"""
|
"""
|
||||||
logger.info('Rebuilding missing manifest, this might take some time...')
|
logger.info('Rebuilding missing manifest, this might take some time...')
|
||||||
|
# as we have lost the manifest, we do not know any more what valid item keys we had.
|
||||||
|
# collecting any key we encounter in a damaged repo seems unwise, thus we just use
|
||||||
|
# the hardcoded list from the source code. thus, it is not recommended to rebuild a
|
||||||
|
# lost manifest on a older borg version than the most recent one that was ever used
|
||||||
|
# within this repository (assuming that newer borg versions support more item keys).
|
||||||
manifest = Manifest(self.key, self.repository)
|
manifest = Manifest(self.key, self.repository)
|
||||||
for chunk_id, _ in self.chunks.iteritems():
|
for chunk_id, _ in self.chunks.iteritems():
|
||||||
cdata = self.repository.get(chunk_id)
|
cdata = self.repository.get(chunk_id)
|
||||||
|
@ -806,7 +811,8 @@ class ArchiveChecker:
|
||||||
|
|
||||||
Missing item chunks will be skipped and the msgpack stream will be restarted
|
Missing item chunks will be skipped and the msgpack stream will be restarted
|
||||||
"""
|
"""
|
||||||
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
|
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item,
|
||||||
|
self.manifest.item_keys)
|
||||||
_state = 0
|
_state = 0
|
||||||
|
|
||||||
def missing_chunk_detector(chunk_id):
|
def missing_chunk_detector(chunk_id):
|
||||||
|
|
|
@ -85,16 +85,19 @@ class Manifest:
|
||||||
|
|
||||||
MANIFEST_ID = b'\0' * 32
|
MANIFEST_ID = b'\0' * 32
|
||||||
|
|
||||||
def __init__(self, key, repository):
|
def __init__(self, key, repository, item_keys=None):
|
||||||
|
from .archive import ITEM_KEYS
|
||||||
self.archives = {}
|
self.archives = {}
|
||||||
self.config = {}
|
self.config = {}
|
||||||
self.key = key
|
self.key = key
|
||||||
self.repository = repository
|
self.repository = repository
|
||||||
|
self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, repository, key=None):
|
def load(cls, repository, key=None):
|
||||||
from .key import key_factory
|
from .key import key_factory
|
||||||
from .repository import Repository
|
from .repository import Repository
|
||||||
|
from .archive import ITEM_KEYS
|
||||||
try:
|
try:
|
||||||
cdata = repository.get(cls.MANIFEST_ID)
|
cdata = repository.get(cls.MANIFEST_ID)
|
||||||
except Repository.ObjectNotFound:
|
except Repository.ObjectNotFound:
|
||||||
|
@ -112,6 +115,8 @@ class Manifest:
|
||||||
if manifest.timestamp:
|
if manifest.timestamp:
|
||||||
manifest.timestamp = manifest.timestamp.decode('ascii')
|
manifest.timestamp = manifest.timestamp.decode('ascii')
|
||||||
manifest.config = m[b'config']
|
manifest.config = m[b'config']
|
||||||
|
# valid item keys are whatever is known in the repo or every key we know
|
||||||
|
manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS
|
||||||
return manifest, key
|
return manifest, key
|
||||||
|
|
||||||
def write(self):
|
def write(self):
|
||||||
|
@ -121,6 +126,7 @@ class Manifest:
|
||||||
'archives': self.archives,
|
'archives': self.archives,
|
||||||
'timestamp': self.timestamp,
|
'timestamp': self.timestamp,
|
||||||
'config': self.config,
|
'config': self.config,
|
||||||
|
'item_keys': tuple(self.item_keys),
|
||||||
}))
|
}))
|
||||||
self.id = self.key.id_hash(data)
|
self.id = self.key.id_hash(data)
|
||||||
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
|
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
|
||||||
|
|
|
@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase):
|
||||||
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
|
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
|
||||||
|
|
||||||
def process(self, input):
|
def process(self, input):
|
||||||
unpacker = RobustUnpacker(validator=self._validator)
|
unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
|
||||||
result = []
|
result = []
|
||||||
for should_sync, chunks in input:
|
for should_sync, chunks in input:
|
||||||
if should_sync:
|
if should_sync:
|
||||||
|
|
Loading…
Add table
Reference in a new issue