diff --git a/.gitattributes b/.gitattributes index a97e72971..9d00a6907 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ borg/_version.py export-subst + +*.py diff=python diff --git a/.travis/install.sh b/.travis/install.sh index 3faec02d1..6a2e9cb7c 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -18,7 +18,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then brew install xz # required for python lzma module brew outdated pyenv || brew upgrade pyenv brew install pkg-config - brew install Caskroom/versions/osxfuse-beta + brew install Caskroom/versions/osxfuse case "${TOXENV}" in py34) diff --git a/src/borg/archive.py b/src/borg/archive.py index e831be225..0044bd53b 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -994,6 +994,9 @@ class ArchiveChecker: self.repair = repair self.repository = repository self.init_chunks() + if not self.chunks: + logger.error('Repository contains no apparent data at all, cannot continue check/repair.') + return False self.key = self.identify_key(repository) if verify_data: self.verify_data() @@ -1260,11 +1263,21 @@ class ArchiveChecker: self.error_found = True logger.error(msg) + def list_keys_safe(keys): + return ', '.join((k.decode() if isinstance(k, bytes) else str(k) for k in keys)) + def valid_item(obj): if not isinstance(obj, StableDict): - return False + return False, 'not a dictionary' + # A bug in Attic up to and including release 0.13 added a (meaningless) b'acl' key to every item. + # We ignore it here, should it exist. See test_attic013_acl_bug for details. + obj.pop(b'acl', None) keys = set(obj) - return required_item_keys.issubset(keys) and keys.issubset(item_keys) + if not required_item_keys.issubset(keys): + return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys) + if not keys.issubset(item_keys): + return False, 'invalid keys: ' + list_keys_safe(keys - item_keys) + return True, '' i = 0 for state, items in groupby(archive.items, missing_chunk_detector): @@ -1281,10 +1294,11 @@ class ArchiveChecker: unpacker.feed(data) try: for item in unpacker: - if valid_item(item): + valid, reason = valid_item(item) + if valid: yield Item(internal_dict=item) else: - report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) + report('Did not get expected metadata dict when unpacking item metadata (%s)' % reason, chunk_id, i) except RobustUnpacker.UnpackerCrashed as err: report('Unpacker crashed while unpacking item metadata, trying to resync...', chunk_id, i) unpacker.resync() diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 6f3fa61d1..f4d5bba39 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2203,6 +2203,8 @@ class Archiver: considered for deletion and only those archives count towards the totals specified by the rules. Otherwise, *all* archives in the repository are candidates for deletion! + There is no automatic distinction between archives representing different + contents. These need to be distinguished by specifying matching prefixes. If you have multiple sequences of archives with different data sets (e.g. from different machines) in one shared repository, use one prune call per diff --git a/src/borg/item.py b/src/borg/item.py index 052478705..84d5085ec 100644 --- a/src/borg/item.py +++ b/src/borg/item.py @@ -16,6 +16,13 @@ class PropDict: - optionally, decode when getting a value - be safe against typos in key names: check against VALID_KEYS - when setting a value: check type of value + + When "packing" a dict, ie. you have a dict with some data and want to convert it into an instance, + then use eg. Item({'a': 1, ...}). This way all keys in your dictionary are validated. + + When "unpacking", that is you've read a dictionary with some data from somewhere (eg mspack), + then use eg. Item(internal_dict={...}). This does not validate the keys, therefore unknown keys + are ignored instead of causing an error. """ VALID_KEYS = None # override with in child class @@ -112,11 +119,14 @@ class Item(PropDict): Items are created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. - msgpack gives us a dict with bytes-typed keys, just give it to Item(d) and use item.key_name later. + msgpack gives us a dict with bytes-typed keys, just give it to Item(internal_dict=d) and use item.key_name later. msgpack gives us byte-typed values for stuff that should be str, we automatically decode when getting such a property and encode when setting it. If an Item shall be serialized, give as_dict() method output to msgpack packer. + + A bug in Attic up to and including release 0.13 added a (meaningless) 'acl' key to every item. + We must never re-use this key. See test_attic013_acl_bug for details. """ VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', } # str-typed keys diff --git a/src/borg/repository.py b/src/borg/repository.py index 08aea5351..7d1bf8294 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -683,6 +683,9 @@ class Repository: if transaction_id is None: logger.debug('No index transaction found, trying latest segment') transaction_id = self.io.get_latest_segment() + if transaction_id is None: + report_error('This repository contains no valid data.') + return False if repair: self.io.cleanup(transaction_id) segments_transaction_id = self.io.get_segments_transaction_id() diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index a978fa0f1..490086503 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2154,6 +2154,41 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): def test_verify_data_unencrypted(self): self._test_verify_data('--encryption', 'none') + def test_empty_repository(self): + with Repository(self.repository_location, exclusive=True) as repository: + for id_ in repository.list(): + repository.delete(id_) + repository.commit() + self.cmd('check', self.repository_location, exit_code=1) + + def test_attic013_acl_bug(self): + # Attic up to release 0.13 contained a bug where every item unintentionally received + # a b'acl'=None key-value pair. + # This bug can still live on in Borg repositories (through borg upgrade). + class Attic013Item: + as_dict = lambda: { + # These are required + b'path': '1234', + b'mtime': 0, + b'mode': 0, + b'user': b'0', + b'group': b'0', + b'uid': 0, + b'gid': 0, + # acl is the offending key. + b'acl': None, + } + + archive, repository = self.open_archive('archive1') + with repository: + manifest, key = Manifest.load(repository) + with Cache(repository, key, manifest) as cache: + archive = Archive(repository, key, manifest, '0.13', cache=cache, create=True) + archive.items_buffer.add(Attic013Item) + archive.save() + self.cmd('check', self.repository_location, exit_code=0) + self.cmd('list', self.repository_location + '::0.13', exit_code=0) + @pytest.mark.skipif(sys.platform == 'cygwin', reason='remote is broken on cygwin and hangs') class RemoteArchiverTestCase(ArchiverTestCase): diff --git a/src/borg/xattr.py b/src/borg/xattr.py index 6da25bfc5..c3d51cb21 100644 --- a/src/borg/xattr.py +++ b/src/borg/xattr.py @@ -12,9 +12,6 @@ from distutils.version import LooseVersion from .helpers import Buffer -from .logger import create_logger -logger = create_logger() - try: ENOATTR = errno.ENOATTR @@ -68,7 +65,7 @@ if libc_name is None: libc_name = 'libc.dylib' else: msg = "Can't find C library. No fallback known. Try installing ldconfig, gcc/cc or objdump." - logger.error(msg) + print(msg, file=sys.stderr) # logger isn't initialized at this stage raise Exception(msg) # If we are running with fakeroot on Linux, then use the xattr functions of fakeroot. This is needed by