From 7501c3b530a2069e9bc450487753644330e47a3a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 5 Jun 2016 01:05:32 +0200 Subject: [PATCH 01/10] better error handling for missing repo manifest, fixes #1043 can happen for not correctly initialized repos or corrupted repos. here: borg list failing more pretty --- borg/helpers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/borg/helpers.py b/borg/helpers.py index 27697cade..aefcdffc5 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -65,6 +65,10 @@ class ExtensionModuleError(Error): """The Borg binary extension modules do not seem to be properly installed""" +class NoManifestError(Error): + """Repository has no manifest.""" + + def check_extension_modules(): from . import platform if hashindex.API_VERSION != 2: @@ -90,7 +94,11 @@ class Manifest: @classmethod def load(cls, repository, key=None): from .key import key_factory - cdata = repository.get(cls.MANIFEST_ID) + from .repository import Repository + try: + cdata = repository.get(cls.MANIFEST_ID) + except Repository.ObjectNotFound: + raise NoManifestError if not key: key = key_factory(repository, cdata) manifest = cls(key, repository) From 6a70d9968c3dfd904e678814f55a9da2206d4af0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 5 Jun 2016 01:22:43 +0200 Subject: [PATCH 02/10] make borg check work for empty repo --- borg/archive.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/borg/archive.py b/borg/archive.py index b10bac07a..e21c1aab5 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -695,7 +695,12 @@ class ArchiveChecker: self.chunks[id_] = (0, 0, 0) def identify_key(self, repository): - cdata = repository.get(next(self.chunks.iteritems())[0]) + try: + some_chunkid, _ = next(self.chunks.iteritems()) + except StopIteration: + # repo is completely empty, no chunks + return None + cdata = repository.get(some_chunkid) return key_factory(repository, cdata) def rebuild_manifest(self): From e10d543ef4a52ea1d3dfd23799f0e73f952f9888 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 5 Jun 2016 02:14:14 +0200 Subject: [PATCH 03/10] delete a repo without manifest --- borg/archiver.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index fb1fa523e..bbd6a2e43 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -19,7 +19,7 @@ from . import __version__ from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \ parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, \ get_cache_dir, prune_within, prune_split, \ - Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ + Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher from .logger import create_logger, setup_logging @@ -397,10 +397,11 @@ class Archiver: cache.commit() return self.exit_code - @with_repository(exclusive=True) - def do_delete(self, args, repository, manifest, key): + @with_repository(exclusive=True, manifest=False) + def do_delete(self, args, repository): """Delete an existing repository or archive""" if args.location.archive: + manifest, key = Manifest.load(repository) with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: archive = Archive(repository, key, manifest, args.location.archive, cache=cache) stats = Statistics() @@ -417,9 +418,15 @@ class Archiver: else: if not args.cache_only: msg = [] - msg.append("You requested to completely DELETE the repository *including* all archives it contains:") - for archive_info in manifest.list_archive_infos(sort_by='ts'): - msg.append(format_archive(archive_info)) + try: + manifest, key = Manifest.load(repository) + except NoManifestError: + msg.append("You requested to completely DELETE the repository *including* all archives it may contain.") + msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.") + else: + msg.append("You requested to completely DELETE the repository *including* all archives it contains:") + for archive_info in manifest.list_archive_infos(sort_by='ts'): + msg.append(format_archive(archive_info)) msg.append("Type 'YES' if you understand this and want to continue: ") msg = '\n'.join(msg) if not yes(msg, false_msg="Aborting.", truish=('YES', ), From 918e0b2a5290cddc25988aa19da27681fa60d57f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 9 Jun 2016 21:08:39 +0200 Subject: [PATCH 04/10] fix resync and msgpacked item qualifier, fixes #1135 when trying to resync and skip invalid data, borg tries to qualify a byte sequence as valid-looking msgpacked item metadata dict (or not) before even invoking msgpack's unpack. besides previously hard to understand code, there were 2 issues: - a missing check for map16 - this type is what msgpack uses if the dict has more than 15 items (could happen in future, not for 1.0.x). - missing checks for str8/16/32 - str16 is what msgpack uses if the bytestring has more than 31 bytes (borg does not have that long key names, thus this wasn't causing any harm) this misqualification (valid data considered invalid) could lead to a wrong resync, skipping valid items. added more comments and tests. --- borg/archive.py | 40 +++++++++++++++++++++++++++++---------- borg/testsuite/archive.py | 35 +++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index b10bac07a..93d90fc9c 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -592,6 +592,34 @@ ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) +def valid_msgpacked_item(d, item_keys_serialized): + """check if the data looks like a msgpacked item metadata dict""" + d_len = len(d) + if d_len == 0: + return False + if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements) + offs = 1 + elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements) + offs = 3 + else: + # object is not a map (dict) + # note: we must not have item dicts with > 2^16-1 elements + return False + if d_len <= offs: + return False + # is the first dict key a bytestring? + if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars) + pass + elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32 + pass + else: + # key is not a bytestring + return False + # is the bytestring any of the expected key names? + key_serialized = d[offs:] + return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized) + + class RobustUnpacker: """A restartable/robust version of the streaming msgpack unpacker """ @@ -622,18 +650,10 @@ class RobustUnpacker: while self._resync: if not data: raise StopIteration - # Abort early if the data does not look like a serialized dict - if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0): + # Abort early if the data does not look like a serialized item dict + if not valid_msgpacked_item(data, self.item_keys): data = data[1:] continue - # Make sure it looks like an item dict - for pattern in self.item_keys: - if data[1:].startswith(pattern): - break - else: - data = data[1:] - continue - self._unpacker = msgpack.Unpacker(object_hook=StableDict) self._unpacker.feed(data) try: diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 662d776c9..78ea25465 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -2,8 +2,9 @@ from datetime import datetime, timezone from unittest.mock import Mock import msgpack +import pytest -from ..archive import Archive, CacheChunkBuffer, RobustUnpacker +from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -112,3 +113,35 @@ class RobustUnpackerTestCase(BaseTestCase): input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + + +@pytest.fixture +def item_keys_serialized(): + return [msgpack.packb(name) for name in ITEM_KEYS] + + +@pytest.mark.parametrize('packed', + [b'', b'x', b'foobar', ] + + [msgpack.packb(o) for o in ( + [None, 0, 0.0, False, '', {}, [], ()] + + [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')] + )]) +def test_invalid_msgpacked_item(packed, item_keys_serialized): + assert not valid_msgpacked_item(packed, item_keys_serialized) + + +@pytest.mark.parametrize('packed', + [msgpack.packb(o) for o in [ + {b'path': b'/a/b/c'}, # small (different msgpack mapping type!) + dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets + dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets + ]]) +def test_valid_msgpacked_items(packed, item_keys_serialized): + assert valid_msgpacked_item(packed, item_keys_serialized) + + +def test_key_length_msgpacked_items(): + key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type + data = {key: b''} + item_keys_serialized = [msgpack.packb(key), ] + assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized) From 7642632359aa724566670838082fa30ed45cf62d Mon Sep 17 00:00:00 2001 From: Dan Helfman Date: Fri, 10 Jun 2016 19:27:19 -0700 Subject: [PATCH 05/10] Update resources.rst to rename atticmatic to borgmatic atticmatic wrapper script has been renamed to borgmatic! --- docs/resources.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/resources.rst b/docs/resources.rst index 59fa0310a..4ae184946 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -38,4 +38,4 @@ Software - `BorgWeb - a very simple web UI for BorgBackup `_ - some other stuff found at the `BorgBackup Github organisation `_ -- `atticmatic `_ (includes borgmatic) +- `borgmatic `_ - simple wrapper script for BorgBackup that creates and prunes backups From 78121a8d04383df4c7d478a76b4716f196939a8a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 9 Jun 2016 23:38:12 +0200 Subject: [PATCH 06/10] store item_keys into manifest, fixes #1147 we need a list of valid item metadata keys. using a list stored in the repo manifest is more future-proof than the hardcoded ITEM_KEYS in the source code. keys that are in union(item_keys_from_repo, item_keys_from_source) are considered valid. --- borg/archive.py | 20 +++++++++++++------- borg/helpers.py | 8 +++++++- borg/testsuite/archive.py | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 5a5d6125c..8638bc22a 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -224,7 +224,7 @@ Number of files: {0.stats.nfiles}'''.format( yield item def add_item(self, item): - unknown_keys = set(item) - ITEM_KEYS + unknown_keys = set(item) - self.manifest.item_keys assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s', ','.join(k.decode('ascii') for k in unknown_keys)) if self.show_progress: @@ -587,9 +587,9 @@ Number of files: {0.stats.nfiles}'''.format( # this set must be kept complete, otherwise the RobustUnpacker might malfunction: -ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', - b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', - b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) +ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', + b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', + b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) def valid_msgpacked_item(d, item_keys_serialized): @@ -623,9 +623,9 @@ def valid_msgpacked_item(d, item_keys_serialized): class RobustUnpacker: """A restartable/robust version of the streaming msgpack unpacker """ - def __init__(self, validator): + def __init__(self, validator, item_keys): super().__init__() - self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS] + self.item_keys = [msgpack.packb(name) for name in item_keys] self.validator = validator self._buffered_data = [] self._resync = False @@ -729,6 +729,11 @@ class ArchiveChecker: Iterates through all objects in the repository looking for archive metadata blocks. """ logger.info('Rebuilding missing manifest, this might take some time...') + # as we have lost the manifest, we do not know any more what valid item keys we had. + # collecting any key we encounter in a damaged repo seems unwise, thus we just use + # the hardcoded list from the source code. thus, it is not recommended to rebuild a + # lost manifest on a older borg version than the most recent one that was ever used + # within this repository (assuming that newer borg versions support more item keys). manifest = Manifest(self.key, self.repository) for chunk_id, _ in self.chunks.iteritems(): cdata = self.repository.get(chunk_id) @@ -806,7 +811,8 @@ class ArchiveChecker: Missing item chunks will be skipped and the msgpack stream will be restarted """ - unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) + unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item, + self.manifest.item_keys) _state = 0 def missing_chunk_detector(chunk_id): diff --git a/borg/helpers.py b/borg/helpers.py index aefcdffc5..e9b8d9a3a 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -85,16 +85,19 @@ class Manifest: MANIFEST_ID = b'\0' * 32 - def __init__(self, key, repository): + def __init__(self, key, repository, item_keys=None): + from .archive import ITEM_KEYS self.archives = {} self.config = {} self.key = key self.repository = repository + self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS @classmethod def load(cls, repository, key=None): from .key import key_factory from .repository import Repository + from .archive import ITEM_KEYS try: cdata = repository.get(cls.MANIFEST_ID) except Repository.ObjectNotFound: @@ -112,6 +115,8 @@ class Manifest: if manifest.timestamp: manifest.timestamp = manifest.timestamp.decode('ascii') manifest.config = m[b'config'] + # valid item keys are whatever is known in the repo or every key we know + manifest.item_keys = frozenset(m.get(b'item_keys', [])) | ITEM_KEYS return manifest, key def write(self): @@ -121,6 +126,7 @@ class Manifest: 'archives': self.archives, 'timestamp': self.timestamp, 'config': self.config, + 'item_keys': tuple(self.item_keys), })) self.id = self.key.id_hash(data) self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 78ea25465..66676f334 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -68,7 +68,7 @@ class RobustUnpackerTestCase(BaseTestCase): return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') def process(self, input): - unpacker = RobustUnpacker(validator=self._validator) + unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS) result = [] for should_sync, chunks in input: if should_sync: From a7b516514949db068fbc8747a8cec076c3a70f69 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 00:10:39 +0200 Subject: [PATCH 07/10] better validation of item metadata dicts, fixes #1130 the previous check only checked that we got a dict, but did not validate the dict keys. this triggered issues with e.g. (invalid) integer keys. now it validates the keys: - some required keys must be present - the set of keys is a subset of all valid keys --- borg/archive.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 8638bc22a..370134671 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -591,6 +591,9 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) +# this is the set of keys that are always present in items: +REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) + def valid_msgpacked_item(d, item_keys_serialized): """check if the data looks like a msgpacked item metadata dict""" @@ -811,8 +814,8 @@ class ArchiveChecker: Missing item chunks will be skipped and the msgpack stream will be restarted """ - unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item, - self.manifest.item_keys) + item_keys = self.manifest.item_keys + unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item, item_keys) _state = 0 def missing_chunk_detector(chunk_id): @@ -827,6 +830,12 @@ class ArchiveChecker: self.error_found = True logger.error(msg) + def valid_item(obj): + if not isinstance(obj, StableDict): + return False + keys = set(obj) + return REQUIRED_ITEM_KEYS.issubset(keys) and keys.issubset(item_keys) + i = 0 for state, items in groupby(archive[b'items'], missing_chunk_detector): items = list(items) @@ -841,7 +850,7 @@ class ArchiveChecker: unpacker.feed(self.key.decrypt(chunk_id, cdata)) try: for item in unpacker: - if isinstance(item, dict): + if valid_item(item): yield item else: report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) From 866417853d26a5b04b2b800cb66bb4541205cedd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 15:07:49 +0200 Subject: [PATCH 08/10] rename valid_msgpacked_item to valid_msgpacked_dict the code is generic, it can also be used for other msgpacked dictionaries. --- borg/archive.py | 10 +++++----- borg/testsuite/archive.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 370134671..b677f5f23 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -595,8 +595,8 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) -def valid_msgpacked_item(d, item_keys_serialized): - """check if the data looks like a msgpacked item metadata dict""" +def valid_msgpacked_dict(d, keys_serialized): + """check if the data looks like a msgpacked dict""" d_len = len(d) if d_len == 0: return False @@ -606,7 +606,7 @@ def valid_msgpacked_item(d, item_keys_serialized): offs = 3 else: # object is not a map (dict) - # note: we must not have item dicts with > 2^16-1 elements + # note: we must not have dicts with > 2^16-1 elements return False if d_len <= offs: return False @@ -620,7 +620,7 @@ def valid_msgpacked_item(d, item_keys_serialized): return False # is the bytestring any of the expected key names? key_serialized = d[offs:] - return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized) + return any(key_serialized.startswith(pattern) for pattern in keys_serialized) class RobustUnpacker: @@ -654,7 +654,7 @@ class RobustUnpacker: if not data: raise StopIteration # Abort early if the data does not look like a serialized item dict - if not valid_msgpacked_item(data, self.item_keys): + if not valid_msgpacked_dict(data, self.item_keys): data = data[1:] continue self._unpacker = msgpack.Unpacker(object_hook=StableDict) diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 66676f334..919d57a0c 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -4,7 +4,7 @@ from unittest.mock import Mock import msgpack import pytest -from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS +from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -127,7 +127,7 @@ def item_keys_serialized(): [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')] )]) def test_invalid_msgpacked_item(packed, item_keys_serialized): - assert not valid_msgpacked_item(packed, item_keys_serialized) + assert not valid_msgpacked_dict(packed, item_keys_serialized) @pytest.mark.parametrize('packed', @@ -137,11 +137,11 @@ def test_invalid_msgpacked_item(packed, item_keys_serialized): dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets ]]) def test_valid_msgpacked_items(packed, item_keys_serialized): - assert valid_msgpacked_item(packed, item_keys_serialized) + assert valid_msgpacked_dict(packed, item_keys_serialized) def test_key_length_msgpacked_items(): key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type data = {key: b''} item_keys_serialized = [msgpack.packb(key), ] - assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized) + assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized) From 03f6282eabbd796f2692ecd5b87812b53c39249c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 15:29:59 +0200 Subject: [PATCH 09/10] make rebuild_manifest more future-proof --- borg/archive.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index b677f5f23..3dedbe8ce 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -594,6 +594,9 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', # this is the set of keys that are always present in items: REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) +# this set must be kept complete, otherwise rebuild_manifest might malfunction: +ARCHIVE_KEYS = set([b'version', b'name', b'items', b'cmdline', b'hostname', b'username', b'time', b'time_end']) + def valid_msgpacked_dict(d, keys_serialized): """check if the data looks like a msgpacked dict""" @@ -738,11 +741,11 @@ class ArchiveChecker: # lost manifest on a older borg version than the most recent one that was ever used # within this repository (assuming that newer borg versions support more item keys). manifest = Manifest(self.key, self.repository) + archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS] for chunk_id, _ in self.chunks.iteritems(): cdata = self.repository.get(chunk_id) data = self.key.decrypt(chunk_id, cdata) - # Some basic sanity checks of the payload before feeding it into msgpack - if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0): + if not valid_msgpacked_dict(data, archive_keys_serialized): continue if b'cmdline' not in data or b'\xa7version\x01' not in data: continue From 69c3b5e196a2d7afec36fde4fea8166a4ff9b21d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 16:06:16 +0200 Subject: [PATCH 10/10] rebuild_manifest: refactor archive metadata dict validation this was already done in a similar way for item metadata dict validation. also: check for some more required keys - the old code would crash if 'name' or 'time' key were missing. --- borg/archive.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 3dedbe8ce..1894ad7a7 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -595,7 +595,10 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) # this set must be kept complete, otherwise rebuild_manifest might malfunction: -ARCHIVE_KEYS = set([b'version', b'name', b'items', b'cmdline', b'hostname', b'username', b'time', b'time_end']) +ARCHIVE_KEYS = frozenset([b'version', b'name', b'items', b'cmdline', b'hostname', b'username', b'time', b'time_end', ]) + +# this is the set of keys that are always present in archives: +REQUIRED_ARCHIVE_KEYS = frozenset([b'version', b'name', b'items', b'cmdline', b'time', ]) def valid_msgpacked_dict(d, keys_serialized): @@ -734,6 +737,12 @@ class ArchiveChecker: Iterates through all objects in the repository looking for archive metadata blocks. """ + def valid_archive(obj): + if not isinstance(obj, dict): + return False + keys = set(obj) + return REQUIRED_ARCHIVE_KEYS.issubset(keys) + logger.info('Rebuilding missing manifest, this might take some time...') # as we have lost the manifest, we do not know any more what valid item keys we had. # collecting any key we encounter in a damaged repo seems unwise, thus we just use @@ -755,7 +764,7 @@ class ArchiveChecker: # msgpack with invalid data except (TypeError, ValueError, StopIteration): continue - if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive: + if valid_archive(archive): logger.info('Found archive %s', archive[b'name'].decode('utf-8')) manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} logger.info('Manifest rebuild complete.')