From 8857422035579aa9891f64ee9e8af0b6f84f6084 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 18 May 2017 02:02:51 +0200 Subject: [PATCH 1/5] document follow_symlinks requirements, check libc, fixes #2507 (cherry picked from commit b484c79bc26c7267d81495f65e519b6f70e7c311) --- borg/archiver.py | 8 ++++++-- borg/helpers.py | 10 ++++++++++ docs/installation.rst | 20 ++++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index a0d335df2..8c0469e50 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ import collections from . import __version__ from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \ parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \ - get_cache_dir, prune_within, prune_split, \ + get_cache_dir, prune_within, prune_split, check_python, \ Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \ EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper, set_ec, \ @@ -2079,13 +2079,17 @@ class Archiver: update_excludes(args) return args + def prerun_checks(self, logger): + check_python() + check_extension_modules() + def run(self, args): os.umask(args.umask) # early, before opening files self.lock_wait = args.lock_wait # This works around http://bugs.python.org/issue9351 func = getattr(args, 'func', None) or getattr(args, 'fallback_func') setup_logging(level=args.log_level, is_serve=func == self.do_serve) # do not use loggers before this! - check_extension_modules() + self.prerun_checks(logger) if is_slow_msgpack(): logger.warning("Using a pure-python msgpack! This will result in lower performance.") return set_ec(func(args)) diff --git a/borg/helpers.py b/borg/helpers.py index e5f9fbd77..59d3c7804 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -115,6 +115,16 @@ class MandatoryFeatureUnsupported(Error): """Unsupported repository feature(s) {}. A newer version of borg is required to access this repository.""" +class PythonLibcTooOld(Error): + """FATAL: this Python was compiled for a too old (g)libc and misses required functionality.""" + + +def check_python(): + required_funcs = {os.stat, os.utime} + if not os.supports_follow_symlinks.issuperset(required_funcs): + raise PythonLibcTooOld + + def check_extension_modules(): from . import platform, compress if hashindex.API_VERSION != '1.0_01': diff --git a/docs/installation.rst b/docs/installation.rst index fac731ff2..0f21c5521 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -20,6 +20,26 @@ There are different ways to install |project_name|: have the latest code or use revision control (each release is tagged). +.. _installation-requirements: + +Pre-Installation Considerations +------------------------------- + +(G)LIBC requirements +-------------------- + +Borg uses some filesytem functions from Python's `os` standard library module +with `follow_symlinks=False`. These are implemented since quite a while with +the non-symlink-following (g)libc functions like e.g. `lstat` or `lutimes` +(not: `stat` or `utimes`). + +Some stoneage systems (like RHEL/CentOS 5) and also Python interpreter binaries +compiled to be able to run on such systems (like Python installed via Anaconda) +might miss these functions and Borg won't be able to work correctly. +This issue will be detected early and Borg will abort with a fatal error. + +For the Borg binaries, there are additional (g)libc requirements, see below. + .. _distribution-package: Distribution Package From 2b3932cac6bd28a6641a232085f79a5cbaba9869 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 18 May 2017 02:37:54 +0200 Subject: [PATCH 2/5] require and use chown with follow_symlinks=False should be equivalent to using os.lchown() before. (cherry picked from commit 094376a8ad23c218defcc69d601be0b014965243) --- borg/archive.py | 2 +- borg/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 743ec06a1..40f144398 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -472,7 +472,7 @@ Number of files: {0.stats.nfiles}'''.format( if fd: os.fchown(fd, uid, gid) else: - os.lchown(path, uid, gid) + os.chown(path, uid, gid, follow_symlinks=False) except OSError: pass if fd: diff --git a/borg/helpers.py b/borg/helpers.py index 59d3c7804..82a86c3c9 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -120,7 +120,7 @@ class PythonLibcTooOld(Error): def check_python(): - required_funcs = {os.stat, os.utime} + required_funcs = {os.stat, os.utime, os.chown} if not os.supports_follow_symlinks.issuperset(required_funcs): raise PythonLibcTooOld From 10b8cd92187aae98e63cc5ee2ea3b3cbed32716c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 18 May 2017 02:44:00 +0200 Subject: [PATCH 3/5] use stat with follow_symlinks=False should be equivalent to using os.lstat() before. (cherry picked from commit efec00b39cb9be6193c368f9c3c09cdc3a5811a9) --- borg/archive.py | 2 +- borg/archiver.py | 4 ++-- borg/testsuite/__init__.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 40f144398..a343ec3a5 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -381,7 +381,7 @@ Number of files: {0.stats.nfiles}'''.format( path = os.path.join(dest, item[b'path']) # Attempt to remove existing files, ignore errors on failure try: - st = os.lstat(path) + st = os.stat(path, follow_symlinks=False) if stat.S_ISDIR(st.st_mode): os.rmdir(path) else: diff --git a/borg/archiver.py b/borg/archiver.py index 8c0469e50..a2f4c9a70 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -293,7 +293,7 @@ class Archiver: path = os.path.normpath(path) if args.one_file_system: try: - restrict_dev = os.lstat(path).st_dev + restrict_dev = os.stat(path, follow_symlinks=False).st_dev except OSError as e: self.print_warning('%s: %s', path, e) continue @@ -346,7 +346,7 @@ class Archiver: try: with backup_io(): - st = os.lstat(path) + st = os.stat(path, follow_symlinks=False) if (st.st_ino, st.st_dev) in skip_inodes: return # Entering a new filesystem? diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 8d757d2bd..9d3a2c0ba 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -73,8 +73,8 @@ class BaseTestCase(unittest.TestCase): for filename in diff.common: path1 = os.path.join(diff.left, filename) path2 = os.path.join(diff.right, filename) - s1 = os.lstat(path1) - s2 = os.lstat(path2) + s1 = os.stat(path1, follow_symlinks=False) + s2 = os.stat(path2, follow_symlinks=False) # Assume path2 is on FUSE if st_dev is different fuse = s1.st_dev != s2.st_dev attrs = ['st_mode', 'st_uid', 'st_gid', 'st_rdev'] From 3bd71c63f936504052ad2b3cc39686f1709e97a1 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 13 Jun 2017 11:42:43 +0200 Subject: [PATCH 4/5] chunker: don't do uint32_t >> 32 (cherry picked from commit 944a4abd58da801c68845ce00e4eab7818bef917) --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 5158009cc..df256ea36 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -39,7 +39,7 @@ static uint32_t table_base[] = 0xc5ae37bb, 0xa76ce12a, 0x8150d8f3, 0x2ec29218, 0xa35f0984, 0x48c0647e, 0x0b5ff98c, 0x71893f7b }; -#define BARREL_SHIFT(v, shift) ( ((v) << shift) | ((v) >> (32 - shift)) ) +#define BARREL_SHIFT(v, shift) ( ((v) << shift) | ((v) >> ((32 - shift) & 0x1f)) ) size_t pagemask; From 12b2e1fdd05efe1f54fe14ae42f439dea360a1b2 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 14 Jun 2017 19:16:36 +0200 Subject: [PATCH 5/5] chunker: fix invalid use of types With the argument specified as unsigned char *, Cython emits code in the Python wrapper to convert string-like objects to unsigned char* (essentially PyBytes_AS_STRING). Because the len(data) call is performed on a cdef'd string-ish type, Cython emits a strlen() call, on the result of PyBytes_AS_STRING. This is not correct, since embedded null bytes are entirely possible. Incidentally, the code generated by Cython was also not correct, since the Clang Static Analyzer found a path of execution where passing arguments in a weird way from Python resulted in strlen(NULL). Formulated like this, Cython emits essentially: c_buzhash( PyBytes_AS_STRING(data), PyObject_Length(data), ... ) which is correct. (cherry picked from commit faf2d0b53777501e48dbc41fe000a4a6aa290f46) --- borg/chunker.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/chunker.pyx b/borg/chunker.pyx index 7ac664ed7..d7a7e9228 100644 --- a/borg/chunker.pyx +++ b/borg/chunker.pyx @@ -48,11 +48,11 @@ cdef class Chunker: return chunker_process(self.chunker) -def buzhash(unsigned char *data, unsigned long seed): +def buzhash(data, unsigned long seed): cdef uint32_t *table cdef uint32_t sum table = buzhash_init_table(seed & 0xffffffff) - sum = c_buzhash(data, len(data), table) + sum = c_buzhash( data, len(data), table) free(table) return sum