diff --git a/docs/changes.rst b/docs/changes.rst index 95e1c612a..3af543990 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -130,13 +130,48 @@ Security fixes: - fix security issue with remote repository access, #1428 + +Version 1.0.7rc2 (2016-08-13) +----------------------------- + Bug fixes: - do not write objects to repository that are bigger than the allowed size, borg will reject reading them, #1451. - IMPORTANT: if you created archives with many millions of files or - directories, please verify if you can open them successfully, - e.g. try a "borg list REPO::ARCHIVE". + + Important: if you created archives with many millions of files or + directories, please verify if you can open them successfully, + e.g. try a "borg list REPO::ARCHIVE". +- lz4 compression: dynamically enlarge the (de)compression buffer, the static + buffer was not big enough for archives with extremely many items, #1453 +- larger item metadata stream chunks, raise archive item limit by 8x, #1452 +- fix untracked segments made by moved DELETEs, #1442 + + Impact: Previously (metadata) segments could become untracked when deleting data, + these would never be cleaned up. +- extended attributes (xattrs) related fixes: + + - fixed a race condition in xattrs querying that led to the entire file not + being backed up (while logging the error, exit code = 1), #1469 + - fixed a race condition in xattrs querying that led to a crash, #1462 + - raise OSError including the error message derived from errno, deal with + path being a integer FD + +Other changes: + +- print active env var override by default, #1467 +- xattr module: refactor code, deduplicate, clean up +- repository: split object size check into too small and too big +- add a transaction_id assertion, so borg init on a broken (inconsistent) + filesystem does not look like a coding error in borg, but points to the + real problem. +- explain confusing TypeError caused by compat support for old servers, #1456 +- add forgotten usage help file from build_usage +- refactor/unify buffer code into helpers.Buffer class, add tests +- docs: + + - document archive limitation, #1452 + - improve prune examples Version 1.0.7rc1 (2016-08-05) diff --git a/docs/faq.rst b/docs/faq.rst index 7f1611036..4b6b68378 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -62,6 +62,17 @@ Which file types, attributes, etc. are *not* preserved? holes in a sparse file. * filesystem specific attributes, like ext4 immutable bit, see :issue:`618`. +Are there other known limitations? +---------------------------------- + +- A single archive can only reference a limited volume of file/dir metadata, + usually corresponding to tens or hundreds of millions of files/dirs. + When trying to go beyond that limit, you will get a fatal IntegrityError + exception telling that the (archive) object is too big. + An easy workaround is to create multiple archives with less items each. + See also the :ref:`archive_limitation` and :issue:`1452`. + + Why is my backup bigger than with attic? Why doesn't |project_name| do compression by default? ---------------------------------------------------------------------------------------------- diff --git a/docs/internals.rst b/docs/internals.rst index b088f68eb..82be188bf 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -160,12 +160,40 @@ object that contains: * version * name -* list of chunks containing item metadata +* list of chunks containing item metadata (size: count * ~40B) * cmdline * hostname * username * time +.. _archive_limitation: + +Note about archive limitations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The archive is currently stored as a single object in the repository +and thus limited in size to MAX_OBJECT_SIZE (20MiB). + +As one chunk list entry is ~40B, that means we can reference ~500.000 item +metadata stream chunks per archive. + +Each item metadata stream chunk is ~128kiB (see hardcoded ITEMS_CHUNKER_PARAMS). + +So that means the whole item metadata stream is limited to ~64GiB chunks. +If compression is used, the amount of storable metadata is bigger - by the +compression factor. + +If the medium size of an item entry is 100B (small size file, no ACLs/xattrs), +that means a limit of ~640 million files/directories per archive. + +If the medium size of an item entry is 2kB (~100MB size files or more +ACLs/xattrs), the limit will be ~32 million files/directories per archive. + +If one tries to create an archive object bigger than MAX_OBJECT_SIZE, a fatal +IntegrityError will be raised. + +A workaround is to create multiple archives with less items each, see +also :issue:`1452`. The Item -------- @@ -174,7 +202,7 @@ Each item represents a file, directory or other fs item and is stored as an ``item`` dictionary that contains: * path -* list of data chunks +* list of data chunks (size: count * ~40B) * user * group * uid diff --git a/docs/usage.rst b/docs/usage.rst index 215871508..d9212ef56 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -439,8 +439,8 @@ you restrict its operation to a subset of the archives using ``--prefix``. When using ``--prefix``, be careful to choose a good prefix - e.g. do not use a prefix "foo" if you do not also want to match "foobar". -It is strongly recommended to always run ``prune --dry-run ...`` first so you -will see what it would do without it actually doing anything. +It is strongly recommended to always run ``prune -v --list --dry-run ...`` +first so you will see what it would do without it actually doing anything. There is also a visualized prune example in ``docs/misc/prune-example.txt``. @@ -448,19 +448,19 @@ There is also a visualized prune example in ``docs/misc/prune-example.txt``. # Keep 7 end of day and 4 additional end of week archives. # Do a dry-run without actually deleting anything. - $ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo + $ borg prune -v --list --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo # Same as above but only apply to archive names starting with the hostname # of the machine followed by a "-" character: - $ borg prune --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo + $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo # Keep 7 end of day, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo + $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo # Keep all backups in the last 10 days, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo + $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo .. include:: usage/info.rst.inc diff --git a/src/borg/archive.py b/src/borg/archive.py index 52a03af85..35ccff0a1 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -169,7 +169,7 @@ class DownloadPipeline: class ChunkBuffer: - BUFFER_SIZE = 1 * 1024 * 1024 + BUFFER_SIZE = 8 * 1024 * 1024 def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS): self.buffer = BytesIO() diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 13955a86b..f0dd0b1b3 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -1,25 +1,18 @@ -import threading import zlib try: import lzma except ImportError: lzma = None +from .helpers import Buffer + cdef extern from "lz4.h": int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil int LZ4_compressBound(int inputSize) nogil -thread_local = threading.local() -thread_local.buffer = bytes() - - -cdef char *get_buffer(size): - size = int(size) - if len(thread_local.buffer) < size: - thread_local.buffer = bytes(size) - return thread_local.buffer +buffer = Buffer(bytearray, size=0) cdef class CompressorBase: @@ -88,7 +81,8 @@ class LZ4(CompressorBase): cdef char *source = idata cdef char *dest osize = LZ4_compressBound(isize) - dest = get_buffer(osize) + buf = buffer.get(osize) + dest = buf with nogil: osize = LZ4_compress_limitedOutput(source, dest, isize, osize) if not osize: @@ -108,7 +102,8 @@ class LZ4(CompressorBase): # allocate more if isize * 3 is already bigger, to avoid having to resize often. osize = max(int(1.1 * 2**23), isize * 3) while True: - dest = get_buffer(osize) + buf = buffer.get(osize) + dest = buf with nogil: rsize = LZ4_decompress_safe(source, dest, isize, osize) if rsize >= 0: diff --git a/src/borg/constants.py b/src/borg/constants.py index 857641f65..d83c41f20 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -41,7 +41,7 @@ HASH_MASK_BITS = 21 # results in ~2MiB chunks statistically CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) # chunker params for the items metadata stream, finer granularity -ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE) +ITEMS_CHUNKER_PARAMS = (15, 19, 17, HASH_WINDOW_SIZE) # return codes returned by borg command # when borg is killed by signal N, rc = 128 + N diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 1761b5d7f..2324d32a9 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -10,9 +10,10 @@ import pwd import re import signal import socket -import sys import stat +import sys import textwrap +import threading import time import unicodedata import uuid @@ -38,7 +39,6 @@ from . import crypto from . import hashindex from . import shellpattern from .constants import * # NOQA -from .compress import get_compressor # meta dict, data bytes _Chunk = namedtuple('_Chunk', 'meta data') @@ -680,6 +680,47 @@ def memoize(function): return decorated_function +class Buffer: + """ + provide a thread-local buffer + """ + def __init__(self, allocator, size=4096, limit=None): + """ + Initialize the buffer: use allocator(size) call to allocate a buffer. + Optionally, set the upper for the buffer size. + """ + assert callable(allocator), 'must give alloc(size) function as first param' + assert limit is None or size <= limit, 'initial size must be <= limit' + self._thread_local = threading.local() + self.allocator = allocator + self.limit = limit + self.resize(size, init=True) + + def __len__(self): + return len(self._thread_local.buffer) + + def resize(self, size, init=False): + """ + resize the buffer - to avoid frequent reallocation, we usually always grow (if needed). + giving init=True it is possible to first-time initialize or shrink the buffer. + if a buffer size beyond the limit is requested, raise ValueError. + """ + size = int(size) + if self.limit is not None and size > self.limit: + raise ValueError('Requested buffer size %d is above the limit of %d.' % (size, self.limit)) + if init or len(self) < size: + self._thread_local.buffer = self.allocator(size) + + def get(self, size=None, init=False): + """ + return a buffer of at least the requested size (None: any current size). + init=True can be given to trigger shrinking of the buffer to the given size. + """ + if size is not None: + self.resize(size, init) + return self._thread_local.buffer + + @memoize def uid2user(uid, default=None): try: @@ -945,7 +986,7 @@ DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', ) def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, - retry_msg=None, invalid_msg=None, env_msg=None, + retry_msg=None, invalid_msg=None, env_msg='{} (from {})', falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH, default=False, retry=True, env_var_override=None, ofile=None, input=input): """Output (usually a question) and let user input an answer. @@ -966,8 +1007,8 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, :param true_msg: message to output before returning True [None] :param default_msg: message to output before returning a [None] :param invalid_msg: message to output after a invalid answer was given [None] - :param env_msg: message to output when using input from env_var_override [None], - needs to have 2 placeholders for answer and env var name, e.g.: "{} (from {})" + :param env_msg: message to output when using input from env_var_override ['{} (from {})'], + needs to have 2 placeholders for answer and env var name :param falsish: sequence of answers qualifying as False :param truish: sequence of answers qualifying as True :param defaultish: sequence of answers qualifying as @@ -1542,6 +1583,7 @@ class CompressionDecider2: return compr_spec, chunk def heuristic_lz4(self, compr_args, chunk): + from .compress import get_compressor meta, data = chunk lz4 = get_compressor('lz4') cdata = lz4.compress(data) diff --git a/src/borg/remote.py b/src/borg/remote.py index 47c810fdd..ff057b7b0 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -189,6 +189,16 @@ class RemoteRepository: except self.RPCError as err: if err.remote_type != 'TypeError': raise + msg = """\ +Please note: +If you see a TypeError complaining about the number of positional arguments +given to open(), you can ignore it if it comes from a borg version < 1.0.7. +This TypeError is a cosmetic side effect of the compatibility code borg +clients >= 1.0.7 have to support older borg servers. +This problem will go away as soon as the server has been upgraded to 1.0.7+. +""" + # emit this msg in the same way as the "Remote: ..." lines that show the remote TypeError + sys.stderr.write(msg) if append_only: raise self.NoAppendOnlyOnServer() self.id = self.call('open', self.location.path, create, lock_wait, lock) diff --git a/src/borg/repository.py b/src/borg/repository.py index c4923f154..794f6fdc0 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -344,6 +344,7 @@ class Repository: b'segments': self.segments, b'compact': self.compact} transaction_id = self.io.get_segments_transaction_id() + assert transaction_id is not None hints_file = os.path.join(self.path, 'hints.%d' % transaction_id) with open(hints_file + '.tmp', 'wb') as fd: msgpack.pack(hints, fd) @@ -448,11 +449,46 @@ class Repository: segments[segment] -= 1 elif tag == TAG_DELETE: if index_transaction_id is None or segment > index_transaction_id: + # (introduced in 6425d16aa84be1eaaf88) + # This is needed to avoid object un-deletion if we crash between the commit and the deletion + # of old segments in complete_xfer(). + # + # However, this only happens if the crash also affects the FS to the effect that file deletions + # did not materialize consistently after journal recovery. If they always materialize in-order + # then this is not a problem, because the old segment containing a deleted object would be deleted + # before the segment containing the delete. + # + # Consider the following series of operations if we would not do this, ie. this entire if: + # would be removed. + # Columns are segments, lines are different keys (line 1 = some key, line 2 = some other key) + # Legend: P=TAG_PUT, D=TAG_DELETE, c=commit, i=index is written for latest commit + # + # Segment | 1 | 2 | 3 + # --------+-------+-----+------ + # Key 1 | P | D | + # Key 2 | P | | P + # commits | c i | c | c i + # --------+-------+-----+------ + # ^- compact_segments starts + # ^- complete_xfer commits, after that complete_xfer deletes + # segments 1 and 2 (and then the index would be written). + # + # Now we crash. But only segment 2 gets deleted, while segment 1 is still around. Now key 1 + # is suddenly undeleted (because the delete in segment 2 is now missing). + # Again, note the requirement here. We delete these in the correct order that this doesn't happen, + # and only if the FS materialization of these deletes is reordered or parts dropped this can happen. + # In this case it doesn't cause outright corruption, 'just' an index count mismatch, which will be + # fixed by borg-check --repair. + # + # Note that in this check the index state is the proxy for a "most definitely settled" repository state, + # ie. the assumption is that *all* operations on segments <= index state are completed and stable. try: - self.io.write_delete(key, raise_full=True) + new_segment, size = self.io.write_delete(key, raise_full=True) except LoggedIO.SegmentFull: complete_xfer() - self.io.write_delete(key) + new_segment, size = self.io.write_delete(key) + self.compact[new_segment] += size + segments.setdefault(new_segment, 0) assert segments[segment] == 0 unused.append(segment) complete_xfer(intermediate=False) diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 30f619747..19db1a44c 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -109,7 +109,7 @@ class ChunkBufferTestCase(BaseTestCase): self.assert_equal(data, [Item(internal_dict=d) for d in unpacker]) def test_partial(self): - big = "0123456789" * 10000 + big = "0123456789abcdefghijklmnopqrstuvwxyz" * 25000 data = [Item(path='full', source=big), Item(path='partial', source=big)] cache = MockCache() key = PlaintextKey(None) diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index e3b160724..0f8a853e0 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -10,6 +10,7 @@ import msgpack import msgpack.fallback from ..helpers import Location +from ..helpers import Buffer from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError from ..helpers import make_path_safe, clean_lines from ..helpers import prune_within, prune_split @@ -713,6 +714,61 @@ def test_is_slow_msgpack(): assert not is_slow_msgpack() +class TestBuffer: + def test_type(self): + buffer = Buffer(bytearray) + assert isinstance(buffer.get(), bytearray) + buffer = Buffer(bytes) # don't do that in practice + assert isinstance(buffer.get(), bytes) + + def test_len(self): + buffer = Buffer(bytearray, size=0) + b = buffer.get() + assert len(buffer) == len(b) == 0 + buffer = Buffer(bytearray, size=1234) + b = buffer.get() + assert len(buffer) == len(b) == 1234 + + def test_resize(self): + buffer = Buffer(bytearray, size=100) + assert len(buffer) == 100 + b1 = buffer.get() + buffer.resize(200) + assert len(buffer) == 200 + b2 = buffer.get() + assert b2 is not b1 # new, bigger buffer + buffer.resize(100) + assert len(buffer) >= 100 + b3 = buffer.get() + assert b3 is b2 # still same buffer (200) + buffer.resize(100, init=True) + assert len(buffer) == 100 # except on init + b4 = buffer.get() + assert b4 is not b3 # new, smaller buffer + + def test_limit(self): + buffer = Buffer(bytearray, size=100, limit=200) + buffer.resize(200) + assert len(buffer) == 200 + with pytest.raises(ValueError): + buffer.resize(201) + assert len(buffer) == 200 + + def test_get(self): + buffer = Buffer(bytearray, size=100, limit=200) + b1 = buffer.get(50) + assert len(b1) >= 50 # == 100 + b2 = buffer.get(100) + assert len(b2) >= 100 # == 100 + assert b2 is b1 # did not need resizing yet + b3 = buffer.get(200) + assert len(b3) == 200 + assert b3 is not b2 # new, resized buffer + with pytest.raises(ValueError): + buffer.get(201) # beyond limit + assert len(buffer) == 200 + + def test_yes_input(): inputs = list(TRUISH) input = FakeInputs(inputs) @@ -804,6 +860,16 @@ def test_yes_output(capfd): assert 'false-msg' in err +def test_yes_env_output(capfd, monkeypatch): + env_var = 'OVERRIDE_SOMETHING' + monkeypatch.setenv(env_var, 'yes') + assert yes(env_var_override=env_var) + out, err = capfd.readouterr() + assert out == '' + assert env_var in err + assert 'yes' in err + + def test_progress_percentage_multiline(capfd): pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%") pi.show(0) diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 0fc1461de..25e93101a 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -13,8 +13,9 @@ from ..helpers import Location from ..helpers import IntegrityError from ..locking import Lock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line -from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE +from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE from . import BaseTestCase +from .hashindex import H UNSPECIFIED = object() # for default values where we can't use None @@ -272,6 +273,28 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): io = self.repository.io assert not io.is_committed_segment(io.get_latest_segment()) + def test_moved_deletes_are_tracked(self): + self.repository.put(H(1), b'1') + self.repository.put(H(2), b'2') + self.repository.commit() + self.repository.delete(H(1)) + self.repository.commit() + last_segment = self.repository.io.get_latest_segment() - 1 + num_deletes = 0 + for tag, key, offset, size in self.repository.io.iter_objects(last_segment): + if tag == TAG_DELETE: + assert key == H(1) + num_deletes += 1 + assert num_deletes == 1 + assert last_segment in self.repository.compact + self.repository.put(H(3), b'3') + self.repository.commit() + assert last_segment not in self.repository.compact + assert not self.repository.io.segment_exists(last_segment) + for segment, _ in self.repository.io.segment_iterator(): + for tag, key, offset, size in self.repository.io.iter_objects(segment): + assert tag != TAG_DELETE + class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): def open(self, create=False): diff --git a/src/borg/testsuite/xattr.py b/src/borg/testsuite/xattr.py index df0130c90..5693c753e 100644 --- a/src/borg/testsuite/xattr.py +++ b/src/borg/testsuite/xattr.py @@ -2,7 +2,7 @@ import os import tempfile import unittest -from ..xattr import is_enabled, getxattr, setxattr, listxattr +from ..xattr import is_enabled, getxattr, setxattr, listxattr, buffer from . import BaseTestCase @@ -38,3 +38,23 @@ class XattrTestCase(BaseTestCase): self.assert_equal(getxattr(self.tmpfile.fileno(), 'user.foo'), b'bar') self.assert_equal(getxattr(self.symlink, 'user.foo'), b'bar') self.assert_equal(getxattr(self.tmpfile.name, 'user.empty'), None) + + def test_listxattr_buffer_growth(self): + # make it work even with ext4, which imposes rather low limits + buffer.resize(size=64, init=True) + # xattr raw key list will be size 9 * (10 + 1), which is > 64 + keys = ['user.attr%d' % i for i in range(9)] + for key in keys: + setxattr(self.tmpfile.name, key, b'x') + got_keys = listxattr(self.tmpfile.name) + self.assert_equal_se(got_keys, keys) + self.assert_equal(len(buffer), 128) + + def test_getxattr_buffer_growth(self): + # make it work even with ext4, which imposes rather low limits + buffer.resize(size=64, init=True) + value = b'x' * 126 + setxattr(self.tmpfile.name, 'user.big', value) + got_value = getxattr(self.tmpfile.name, 'user.big') + self.assert_equal(value, got_value) + self.assert_equal(len(buffer), 128) diff --git a/src/borg/xattr.py b/src/borg/xattr.py index 9f4ab34d0..6da25bfc5 100644 --- a/src/borg/xattr.py +++ b/src/borg/xattr.py @@ -1,5 +1,5 @@ -"""A basic extended attributes (xattr) implementation for Linux and MacOS X -""" +"""A basic extended attributes (xattr) implementation for Linux, FreeBSD and MacOS X.""" + import errno import os import re @@ -10,10 +10,22 @@ from ctypes import CDLL, create_string_buffer, c_ssize_t, c_size_t, c_char_p, c_ from ctypes.util import find_library from distutils.version import LooseVersion +from .helpers import Buffer + from .logger import create_logger logger = create_logger() +try: + ENOATTR = errno.ENOATTR +except AttributeError: + # on some platforms, ENOATTR is missing, use ENODATA there + ENOATTR = errno.ENODATA + + +buffer = Buffer(create_string_buffer, limit=2**24) + + def is_enabled(path=None): """Determine if xattr is enabled on the filesystem """ @@ -27,12 +39,22 @@ def is_enabled(path=None): def get_all(path, follow_symlinks=True): try: - return dict((name, getxattr(path, name, follow_symlinks=follow_symlinks)) - for name in listxattr(path, follow_symlinks=follow_symlinks)) + result = {} + names = listxattr(path, follow_symlinks=follow_symlinks) + for name in names: + try: + result[name] = getxattr(path, name, follow_symlinks=follow_symlinks) + except OSError as e: + # if we get ENOATTR, a race has happened: xattr names were deleted after list. + # we just ignore the now missing ones. if you want consistency, do snapshots. + if e.errno != ENOATTR: + raise + return result except OSError as e: if e.errno in (errno.ENOTSUP, errno.EPERM): return {} + libc_name = find_library('c') if libc_name is None: # find_library didn't work, maybe we are on some minimal system that misses essential @@ -75,11 +97,88 @@ except OSError as e: raise Exception(msg) -def _check(rv, path=None): +def split_string0(buf): + """split a list of zero-terminated strings into python not-zero-terminated bytes""" + return buf.split(b'\0')[:-1] + + +def split_lstring(buf): + """split a list of length-prefixed strings into python not-length-prefixed bytes""" + result = [] + mv = memoryview(buf) + while mv: + length = mv[0] + result.append(bytes(mv[1:1 + length])) + mv = mv[1 + length:] + return result + + +class BufferTooSmallError(Exception): + """the buffer given to an xattr function was too small for the result""" + + +def _check(rv, path=None, detect_buffer_too_small=False): if rv < 0: - raise OSError(get_errno(), path) + e = get_errno() + if detect_buffer_too_small and e == errno.ERANGE: + # listxattr and getxattr signal with ERANGE that they need a bigger result buffer. + # setxattr signals this way that e.g. a xattr key name is too long / inacceptable. + raise BufferTooSmallError + else: + try: + msg = os.strerror(e) + except ValueError: + msg = '' + if isinstance(path, int): + path = '' % path + raise OSError(e, msg, path) + if detect_buffer_too_small and rv >= len(buffer): + # freebsd does not error with ERANGE if the buffer is too small, + # it just fills the buffer, truncates and returns. + # so, we play sure and just assume that result is truncated if + # it happens to be a full buffer. + raise BufferTooSmallError return rv + +def _listxattr_inner(func, path): + if isinstance(path, str): + path = os.fsencode(path) + size = len(buffer) + while True: + buf = buffer.get(size) + try: + n = _check(func(path, buf, size), path, detect_buffer_too_small=True) + except BufferTooSmallError: + size *= 2 + else: + return n, buf.raw + + +def _getxattr_inner(func, path, name): + if isinstance(path, str): + path = os.fsencode(path) + name = os.fsencode(name) + size = len(buffer) + while True: + buf = buffer.get(size) + try: + n = _check(func(path, name, buf, size), path, detect_buffer_too_small=True) + except BufferTooSmallError: + size *= 2 + else: + return n, buf.raw + + +def _setxattr_inner(func, path, name, value): + if isinstance(path, str): + path = os.fsencode(path) + name = os.fsencode(name) + value = value and os.fsencode(value) + size = len(value) if value else 0 + _check(func(path, name, value, size), path, detect_buffer_too_small=False) + + if sys.platform.startswith('linux'): # pragma: linux only libc.llistxattr.argtypes = (c_char_p, c_char_p, c_size_t) libc.llistxattr.restype = c_ssize_t @@ -95,54 +194,44 @@ if sys.platform.startswith('linux'): # pragma: linux only libc.fgetxattr.restype = c_ssize_t def listxattr(path, *, follow_symlinks=True): - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.flistxattr - elif follow_symlinks: - func = libc.listxattr - else: - func = libc.llistxattr - n = _check(func(path, None, 0), path) - if n == 0: - return [] - namebuf = create_string_buffer(n) - n2 = _check(func(path, namebuf, n), path) - if n2 != n: - raise Exception('listxattr failed') - return [os.fsdecode(name) for name in namebuf.raw.split(b'\0')[:-1] if not name.startswith(b'system.posix_acl_')] + def func(path, buf, size): + if isinstance(path, int): + return libc.flistxattr(path, buf, size) + else: + if follow_symlinks: + return libc.listxattr(path, buf, size) + else: + return libc.llistxattr(path, buf, size) + + n, buf = _listxattr_inner(func, path) + return [os.fsdecode(name) for name in split_string0(buf[:n]) + if not name.startswith(b'system.posix_acl_')] def getxattr(path, name, *, follow_symlinks=True): - name = os.fsencode(name) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fgetxattr - elif follow_symlinks: - func = libc.getxattr - else: - func = libc.lgetxattr - n = _check(func(path, name, None, 0)) - if n == 0: - return - valuebuf = create_string_buffer(n) - n2 = _check(func(path, name, valuebuf, n), path) - if n2 != n: - raise Exception('getxattr failed') - return valuebuf.raw + def func(path, name, buf, size): + if isinstance(path, int): + return libc.fgetxattr(path, name, buf, size) + else: + if follow_symlinks: + return libc.getxattr(path, name, buf, size) + else: + return libc.lgetxattr(path, name, buf, size) + + n, buf = _getxattr_inner(func, path, name) + return buf[:n] or None def setxattr(path, name, value, *, follow_symlinks=True): - name = os.fsencode(name) - value = value and os.fsencode(value) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fsetxattr - elif follow_symlinks: - func = libc.setxattr - else: - func = libc.lsetxattr - _check(func(path, name, value, len(value) if value else 0, 0), path) + def func(path, name, value, size): + flags = 0 + if isinstance(path, int): + return libc.fsetxattr(path, name, value, size, flags) + else: + if follow_symlinks: + return libc.setxattr(path, name, value, size, flags) + else: + return libc.lsetxattr(path, name, value, size, flags) + + _setxattr_inner(func, path, name, value) elif sys.platform == 'darwin': # pragma: darwin only libc.listxattr.argtypes = (c_char_p, c_char_p, c_size_t, c_int) @@ -158,60 +247,48 @@ elif sys.platform == 'darwin': # pragma: darwin only libc.fgetxattr.argtypes = (c_int, c_char_p, c_char_p, c_size_t, c_uint32, c_int) libc.fgetxattr.restype = c_ssize_t + XATTR_NOFLAGS = 0x0000 XATTR_NOFOLLOW = 0x0001 def listxattr(path, *, follow_symlinks=True): - func = libc.listxattr - flags = 0 - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.flistxattr - elif not follow_symlinks: - flags = XATTR_NOFOLLOW - n = _check(func(path, None, 0, flags), path) - if n == 0: - return [] - namebuf = create_string_buffer(n) - n2 = _check(func(path, namebuf, n, flags), path) - if n2 != n: - raise Exception('listxattr failed') - return [os.fsdecode(name) for name in namebuf.raw.split(b'\0')[:-1]] + def func(path, buf, size): + if isinstance(path, int): + return libc.flistxattr(path, buf, size, XATTR_NOFLAGS) + else: + if follow_symlinks: + return libc.listxattr(path, buf, size, XATTR_NOFLAGS) + else: + return libc.listxattr(path, buf, size, XATTR_NOFOLLOW) + + n, buf = _listxattr_inner(func, path) + return [os.fsdecode(name) for name in split_string0(buf[:n])] def getxattr(path, name, *, follow_symlinks=True): - name = os.fsencode(name) - func = libc.getxattr - flags = 0 - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fgetxattr - elif not follow_symlinks: - flags = XATTR_NOFOLLOW - n = _check(func(path, name, None, 0, 0, flags)) - if n == 0: - return - valuebuf = create_string_buffer(n) - n2 = _check(func(path, name, valuebuf, n, 0, flags), path) - if n2 != n: - raise Exception('getxattr failed') - return valuebuf.raw + def func(path, name, buf, size): + if isinstance(path, int): + return libc.fgetxattr(path, name, buf, size, 0, XATTR_NOFLAGS) + else: + if follow_symlinks: + return libc.getxattr(path, name, buf, size, 0, XATTR_NOFLAGS) + else: + return libc.getxattr(path, name, buf, size, 0, XATTR_NOFOLLOW) + + n, buf = _getxattr_inner(func, path, name) + return buf[:n] or None def setxattr(path, name, value, *, follow_symlinks=True): - name = os.fsencode(name) - value = value and os.fsencode(value) - func = libc.setxattr - flags = 0 - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.fsetxattr - elif not follow_symlinks: - flags = XATTR_NOFOLLOW - _check(func(path, name, value, len(value) if value else 0, 0, flags), path) + def func(path, name, value, size): + if isinstance(path, int): + return libc.fsetxattr(path, name, value, size, 0, XATTR_NOFLAGS) + else: + if follow_symlinks: + return libc.setxattr(path, name, value, size, 0, XATTR_NOFLAGS) + else: + return libc.setxattr(path, name, value, size, 0, XATTR_NOFOLLOW) + + _setxattr_inner(func, path, name, value) elif sys.platform.startswith('freebsd'): # pragma: freebsd only - EXTATTR_NAMESPACE_USER = 0x0001 libc.extattr_list_fd.argtypes = (c_int, c_int, c_char_p, c_size_t) libc.extattr_list_fd.restype = c_ssize_t libc.extattr_list_link.argtypes = (c_char_p, c_int, c_char_p, c_size_t) @@ -230,63 +307,45 @@ elif sys.platform.startswith('freebsd'): # pragma: freebsd only libc.extattr_set_link.restype = c_int libc.extattr_set_file.argtypes = (c_char_p, c_int, c_char_p, c_char_p, c_size_t) libc.extattr_set_file.restype = c_int + ns = EXTATTR_NAMESPACE_USER = 0x0001 def listxattr(path, *, follow_symlinks=True): - ns = EXTATTR_NAMESPACE_USER - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.extattr_list_fd - elif follow_symlinks: - func = libc.extattr_list_file - else: - func = libc.extattr_list_link - n = _check(func(path, ns, None, 0), path) - if n == 0: - return [] - namebuf = create_string_buffer(n) - n2 = _check(func(path, ns, namebuf, n), path) - if n2 != n: - raise Exception('listxattr failed') - names = [] - mv = memoryview(namebuf.raw) - while mv: - length = mv[0] - names.append(os.fsdecode(bytes(mv[1:1 + length]))) - mv = mv[1 + length:] - return names + def func(path, buf, size): + if isinstance(path, int): + return libc.extattr_list_fd(path, ns, buf, size) + else: + if follow_symlinks: + return libc.extattr_list_file(path, ns, buf, size) + else: + return libc.extattr_list_link(path, ns, buf, size) + + n, buf = _listxattr_inner(func, path) + return [os.fsdecode(name) for name in split_lstring(buf[:n])] def getxattr(path, name, *, follow_symlinks=True): - name = os.fsencode(name) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.extattr_get_fd - elif follow_symlinks: - func = libc.extattr_get_file - else: - func = libc.extattr_get_link - n = _check(func(path, EXTATTR_NAMESPACE_USER, name, None, 0)) - if n == 0: - return - valuebuf = create_string_buffer(n) - n2 = _check(func(path, EXTATTR_NAMESPACE_USER, name, valuebuf, n), path) - if n2 != n: - raise Exception('getxattr failed') - return valuebuf.raw + def func(path, name, buf, size): + if isinstance(path, int): + return libc.extattr_get_fd(path, ns, name, buf, size) + else: + if follow_symlinks: + return libc.extattr_get_file(path, ns, name, buf, size) + else: + return libc.extattr_get_link(path, ns, name, buf, size) + + n, buf = _getxattr_inner(func, path, name) + return buf[:n] or None def setxattr(path, name, value, *, follow_symlinks=True): - name = os.fsencode(name) - value = value and os.fsencode(value) - if isinstance(path, str): - path = os.fsencode(path) - if isinstance(path, int): - func = libc.extattr_set_fd - elif follow_symlinks: - func = libc.extattr_set_file - else: - func = libc.extattr_set_link - _check(func(path, EXTATTR_NAMESPACE_USER, name, value, len(value) if value else 0), path) + def func(path, name, value, size): + if isinstance(path, int): + return libc.extattr_set_fd(path, ns, name, value, size) + else: + if follow_symlinks: + return libc.extattr_set_file(path, ns, name, value, size) + else: + return libc.extattr_set_link(path, ns, name, value, size) + + _setxattr_inner(func, path, name, value) else: # pragma: unknown platform only def listxattr(path, *, follow_symlinks=True):