mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-26 17:57:59 +00:00
Merge branch '1.0-maint'
Also: add missing keys to ARCHIVE_KEYS set.
This commit is contained in:
commit
9a64835b4d
6 changed files with 142 additions and 32 deletions
|
@ -38,4 +38,4 @@ Software
|
|||
|
||||
- `BorgWeb - a very simple web UI for BorgBackup <https://borgweb.readthedocs.io/>`_
|
||||
- some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
|
||||
- `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)
|
||||
- `borgmatic <https://torsion.org/borgmatic/>`_ - simple wrapper script for BorgBackup that creates and prunes backups
|
||||
|
|
|
@ -690,12 +690,40 @@ def _open_rb(path):
|
|||
return os.open(path, flags_normal)
|
||||
|
||||
|
||||
def valid_msgpacked_dict(d, keys_serialized):
|
||||
"""check if the data <d> looks like a msgpacked dict"""
|
||||
d_len = len(d)
|
||||
if d_len == 0:
|
||||
return False
|
||||
if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements)
|
||||
offs = 1
|
||||
elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements)
|
||||
offs = 3
|
||||
else:
|
||||
# object is not a map (dict)
|
||||
# note: we must not have dicts with > 2^16-1 elements
|
||||
return False
|
||||
if d_len <= offs:
|
||||
return False
|
||||
# is the first dict key a bytestring?
|
||||
if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars)
|
||||
pass
|
||||
elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32
|
||||
pass
|
||||
else:
|
||||
# key is not a bytestring
|
||||
return False
|
||||
# is the bytestring any of the expected key names?
|
||||
key_serialized = d[offs:]
|
||||
return any(key_serialized.startswith(pattern) for pattern in keys_serialized)
|
||||
|
||||
|
||||
class RobustUnpacker:
|
||||
"""A restartable/robust version of the streaming msgpack unpacker
|
||||
"""
|
||||
def __init__(self, validator):
|
||||
def __init__(self, validator, item_keys):
|
||||
super().__init__()
|
||||
self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS]
|
||||
self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
|
||||
self.validator = validator
|
||||
self._buffered_data = []
|
||||
self._resync = False
|
||||
|
@ -720,18 +748,10 @@ def __next__(self):
|
|||
while self._resync:
|
||||
if not data:
|
||||
raise StopIteration
|
||||
# Abort early if the data does not look like a serialized dict
|
||||
if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
|
||||
# Abort early if the data does not look like a serialized item dict
|
||||
if not valid_msgpacked_dict(data, self.item_keys):
|
||||
data = data[1:]
|
||||
continue
|
||||
# Make sure it looks like an item dict
|
||||
for pattern in self.item_keys:
|
||||
if data[1:].startswith(pattern):
|
||||
break
|
||||
else:
|
||||
data = data[1:]
|
||||
continue
|
||||
|
||||
self._unpacker = msgpack.Unpacker(object_hook=StableDict)
|
||||
self._unpacker.feed(data)
|
||||
try:
|
||||
|
@ -806,7 +826,12 @@ def init_chunks(self):
|
|||
self.chunks[id_] = init_entry
|
||||
|
||||
def identify_key(self, repository):
|
||||
cdata = repository.get(next(self.chunks.iteritems())[0])
|
||||
try:
|
||||
some_chunkid, _ = next(self.chunks.iteritems())
|
||||
except StopIteration:
|
||||
# repo is completely empty, no chunks
|
||||
return None
|
||||
cdata = repository.get(some_chunkid)
|
||||
return key_factory(repository, cdata)
|
||||
|
||||
def verify_data(self):
|
||||
|
@ -834,13 +859,26 @@ def rebuild_manifest(self):
|
|||
|
||||
Iterates through all objects in the repository looking for archive metadata blocks.
|
||||
"""
|
||||
required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
|
||||
|
||||
def valid_archive(obj):
|
||||
if not isinstance(obj, dict):
|
||||
return False
|
||||
keys = set(obj)
|
||||
return required_archive_keys.issubset(keys)
|
||||
|
||||
logger.info('Rebuilding missing manifest, this might take some time...')
|
||||
# as we have lost the manifest, we do not know any more what valid item keys we had.
|
||||
# collecting any key we encounter in a damaged repo seems unwise, thus we just use
|
||||
# the hardcoded list from the source code. thus, it is not recommended to rebuild a
|
||||
# lost manifest on a older borg version than the most recent one that was ever used
|
||||
# within this repository (assuming that newer borg versions support more item keys).
|
||||
manifest = Manifest(self.key, self.repository)
|
||||
archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
|
||||
for chunk_id, _ in self.chunks.iteritems():
|
||||
cdata = self.repository.get(chunk_id)
|
||||
_, data = self.key.decrypt(chunk_id, cdata)
|
||||
# Some basic sanity checks of the payload before feeding it into msgpack
|
||||
if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
|
||||
if not valid_msgpacked_dict(data, archive_keys_serialized):
|
||||
continue
|
||||
if b'cmdline' not in data or b'\xa7version\x01' not in data:
|
||||
continue
|
||||
|
@ -850,7 +888,7 @@ def rebuild_manifest(self):
|
|||
# msgpack with invalid data
|
||||
except (TypeError, ValueError, StopIteration):
|
||||
continue
|
||||
if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
|
||||
if valid_archive(archive):
|
||||
logger.info('Found archive %s', archive[b'name'].decode('utf-8'))
|
||||
manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']}
|
||||
logger.info('Manifest rebuild complete.')
|
||||
|
@ -912,7 +950,10 @@ def robust_iterator(archive):
|
|||
|
||||
Missing item chunks will be skipped and the msgpack stream will be restarted
|
||||
"""
|
||||
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item)
|
||||
item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
|
||||
required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
|
||||
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item,
|
||||
self.manifest.item_keys)
|
||||
_state = 0
|
||||
|
||||
def missing_chunk_detector(chunk_id):
|
||||
|
@ -927,6 +968,12 @@ def report(msg, chunk_id, chunk_no):
|
|||
self.error_found = True
|
||||
logger.error(msg)
|
||||
|
||||
def valid_item(obj):
|
||||
if not isinstance(obj, StableDict):
|
||||
return False
|
||||
keys = set(obj)
|
||||
return required_item_keys.issubset(keys) and keys.issubset(item_keys)
|
||||
|
||||
i = 0
|
||||
for state, items in groupby(archive[b'items'], missing_chunk_detector):
|
||||
items = list(items)
|
||||
|
@ -942,7 +989,7 @@ def report(msg, chunk_id, chunk_no):
|
|||
unpacker.feed(data)
|
||||
try:
|
||||
for item in unpacker:
|
||||
if isinstance(item, dict):
|
||||
if valid_item(item):
|
||||
yield Item(internal_dict=item)
|
||||
else:
|
||||
report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
|
||||
|
|
|
@ -26,7 +26,8 @@
|
|||
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics
|
||||
from .cache import Cache
|
||||
from .constants import * # NOQA
|
||||
from .helpers import Error
|
||||
from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
|
||||
from .helpers import Error, NoManifestError
|
||||
from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
|
||||
from .helpers import ItemFormatter, format_time, format_file_size, format_archive
|
||||
from .helpers import safe_encode, remove_surrogates, bin_to_hex
|
||||
|
@ -665,10 +666,11 @@ def do_rename(self, args, repository, manifest, key, cache, archive):
|
|||
cache.commit()
|
||||
return self.exit_code
|
||||
|
||||
@with_repository(exclusive=True)
|
||||
def do_delete(self, args, repository, manifest, key):
|
||||
@with_repository(exclusive=True, manifest=False)
|
||||
def do_delete(self, args, repository):
|
||||
"""Delete an existing repository or archive"""
|
||||
if args.location.archive:
|
||||
manifest, key = Manifest.load(repository)
|
||||
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
|
||||
archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
|
||||
stats = Statistics()
|
||||
|
@ -685,6 +687,12 @@ def do_delete(self, args, repository, manifest, key):
|
|||
else:
|
||||
if not args.cache_only:
|
||||
msg = []
|
||||
try:
|
||||
manifest, key = Manifest.load(repository)
|
||||
except NoManifestError:
|
||||
msg.append("You requested to completely DELETE the repository *including* all archives it may contain.")
|
||||
msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
|
||||
else:
|
||||
msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
|
||||
for archive_info in manifest.list_archive_infos(sort_by='ts'):
|
||||
msg.append(format_archive(archive_info))
|
||||
|
|
|
@ -1,8 +1,19 @@
|
|||
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
|
||||
ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
|
||||
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
|
||||
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
|
||||
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ])
|
||||
|
||||
# this is the set of keys that are always present in items:
|
||||
REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ])
|
||||
|
||||
# this set must be kept complete, otherwise rebuild_manifest might malfunction:
|
||||
ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
|
||||
'comment', 'chunker_params',
|
||||
'recreate_cmdline', 'recreate_source_id', 'recreate_args'])
|
||||
|
||||
# this is the set of keys that are always present in archives:
|
||||
REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
|
||||
|
||||
ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
|
||||
|
||||
# default umask, overriden by --umask, defaults to read/write only for owner
|
||||
|
|
|
@ -84,6 +84,10 @@ class ExtensionModuleError(Error):
|
|||
"""The Borg binary extension modules do not seem to be properly installed"""
|
||||
|
||||
|
||||
class NoManifestError(Error):
|
||||
"""Repository has no manifest."""
|
||||
|
||||
|
||||
def check_extension_modules():
|
||||
from . import platform
|
||||
if hashindex.API_VERSION != 2:
|
||||
|
@ -100,11 +104,12 @@ class Manifest:
|
|||
|
||||
MANIFEST_ID = b'\0' * 32
|
||||
|
||||
def __init__(self, key, repository):
|
||||
def __init__(self, key, repository, item_keys=None):
|
||||
self.archives = {}
|
||||
self.config = {}
|
||||
self.key = key
|
||||
self.repository = repository
|
||||
self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
|
||||
|
||||
@property
|
||||
def id_str(self):
|
||||
|
@ -113,7 +118,11 @@ def id_str(self):
|
|||
@classmethod
|
||||
def load(cls, repository, key=None):
|
||||
from .key import key_factory
|
||||
from .repository import Repository
|
||||
try:
|
||||
cdata = repository.get(cls.MANIFEST_ID)
|
||||
except Repository.ObjectNotFound:
|
||||
raise NoManifestError
|
||||
if not key:
|
||||
key = key_factory(repository, cdata)
|
||||
manifest = cls(key, repository)
|
||||
|
@ -127,6 +136,8 @@ def load(cls, repository, key=None):
|
|||
if manifest.timestamp:
|
||||
manifest.timestamp = manifest.timestamp.decode('ascii')
|
||||
manifest.config = m[b'config']
|
||||
# valid item keys are whatever is known in the repo or every key we know
|
||||
manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', []))
|
||||
return manifest, key
|
||||
|
||||
def write(self):
|
||||
|
@ -136,6 +147,7 @@ def write(self):
|
|||
'archives': self.archives,
|
||||
'timestamp': self.timestamp,
|
||||
'config': self.config,
|
||||
'item_keys': tuple(self.item_keys),
|
||||
}))
|
||||
self.id = self.key.id_hash(data)
|
||||
self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
import pytest
|
||||
import msgpack
|
||||
|
||||
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
|
||||
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
|
||||
from ..item import Item
|
||||
from ..key import PlaintextKey
|
||||
from ..helpers import Manifest
|
||||
|
@ -139,7 +139,7 @@ def _validator(self, value):
|
|||
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
|
||||
|
||||
def process(self, input):
|
||||
unpacker = RobustUnpacker(validator=self._validator)
|
||||
unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
|
||||
result = []
|
||||
for should_sync, chunks in input:
|
||||
if should_sync:
|
||||
|
@ -184,3 +184,35 @@ def test_corrupt_chunk(self):
|
|||
input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
|
||||
result = self.process(input)
|
||||
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def item_keys_serialized():
|
||||
return [msgpack.packb(name) for name in ITEM_KEYS]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('packed',
|
||||
[b'', b'x', b'foobar', ] +
|
||||
[msgpack.packb(o) for o in (
|
||||
[None, 0, 0.0, False, '', {}, [], ()] +
|
||||
[42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
|
||||
)])
|
||||
def test_invalid_msgpacked_item(packed, item_keys_serialized):
|
||||
assert not valid_msgpacked_dict(packed, item_keys_serialized)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('packed',
|
||||
[msgpack.packb(o) for o in [
|
||||
{b'path': b'/a/b/c'}, # small (different msgpack mapping type!)
|
||||
dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets
|
||||
dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets
|
||||
]])
|
||||
def test_valid_msgpacked_items(packed, item_keys_serialized):
|
||||
assert valid_msgpacked_dict(packed, item_keys_serialized)
|
||||
|
||||
|
||||
def test_key_length_msgpacked_items():
|
||||
key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type
|
||||
data = {key: b''}
|
||||
item_keys_serialized = [msgpack.packb(key), ]
|
||||
assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
|
||||
|
|
Loading…
Reference in a new issue