2014-04-13 18:26:46 +00:00
|
|
|
import os
|
2014-04-13 21:41:04 +00:00
|
|
|
import re
|
2016-05-17 22:22:49 +00:00
|
|
|
import stat
|
|
|
|
|
2018-11-10 20:43:45 +00:00
|
|
|
from .posix import posix_acl_use_stored_uid_gid
|
|
|
|
from .posix import user2uid, group2gid
|
2019-08-22 23:28:30 +00:00
|
|
|
from ..helpers import workarounds
|
2016-05-31 00:35:54 +00:00
|
|
|
from ..helpers import safe_decode, safe_encode
|
|
|
|
from .base import SyncFile as BaseSyncFile
|
2017-01-29 04:49:53 +00:00
|
|
|
from .base import safe_fadvise
|
2018-06-21 21:53:47 +00:00
|
|
|
from .xattr import _listxattr_inner, _getxattr_inner, _setxattr_inner, split_string0
|
2020-01-02 17:54:00 +00:00
|
|
|
try:
|
|
|
|
from .syncfilerange import sync_file_range, SYNC_FILE_RANGE_WRITE, SYNC_FILE_RANGE_WAIT_BEFORE, SYNC_FILE_RANGE_WAIT_AFTER
|
|
|
|
SYNC_FILE_RANGE_LOADED = True
|
|
|
|
except ImportError:
|
|
|
|
SYNC_FILE_RANGE_LOADED = False
|
2016-05-18 14:08:27 +00:00
|
|
|
|
Improve LoggedIO write performance, make commit mechanism more solid
- Instead of very small (5 MB-ish) segment files, use larger ones
- Request asynchronous write-out or write-through (TODO) where it is supported,
to achieve a continuously high throughput for writes
- Instead of depending on ordered writes (write data, commit tag, sync)
for consistency, do a double-sync commit as more serious RDBMS also do
i.e. write data, sync, write commit tag, sync
Since commits are very expensive in Borg at the moment this makes no
difference performance-wise.
New platform APIs: SyncFile, sync_dir
[x] Naive implementation (equivalent to what Borg did before)
[x] Linux implementation
[ ] Windows implementation
[-] OSX implementation (F_FULLSYNC)
2016-05-14 20:46:41 +00:00
|
|
|
from libc cimport errno
|
2014-04-13 18:26:46 +00:00
|
|
|
|
2019-03-18 20:51:02 +00:00
|
|
|
API_VERSION = '1.2_05'
|
2018-06-21 21:53:47 +00:00
|
|
|
|
2018-07-10 10:08:59 +00:00
|
|
|
cdef extern from "sys/xattr.h":
|
2018-06-21 21:53:47 +00:00
|
|
|
ssize_t c_listxattr "listxattr" (const char *path, char *list, size_t size)
|
|
|
|
ssize_t c_llistxattr "llistxattr" (const char *path, char *list, size_t size)
|
|
|
|
ssize_t c_flistxattr "flistxattr" (int filedes, char *list, size_t size)
|
|
|
|
|
|
|
|
ssize_t c_getxattr "getxattr" (const char *path, const char *name, void *value, size_t size)
|
|
|
|
ssize_t c_lgetxattr "lgetxattr" (const char *path, const char *name, void *value, size_t size)
|
|
|
|
ssize_t c_fgetxattr "fgetxattr" (int filedes, const char *name, void *value, size_t size)
|
|
|
|
|
|
|
|
int c_setxattr "setxattr" (const char *path, const char *name, const void *value, size_t size, int flags)
|
|
|
|
int c_lsetxattr "lsetxattr" (const char *path, const char *name, const void *value, size_t size, int flags)
|
|
|
|
int c_fsetxattr "fsetxattr" (int filedes, const char *name, const void *value, size_t size, int flags)
|
2014-04-13 18:26:46 +00:00
|
|
|
|
|
|
|
cdef extern from "sys/types.h":
|
|
|
|
int ACL_TYPE_ACCESS
|
|
|
|
int ACL_TYPE_DEFAULT
|
|
|
|
|
|
|
|
cdef extern from "sys/acl.h":
|
|
|
|
ctypedef struct _acl_t:
|
|
|
|
pass
|
|
|
|
ctypedef _acl_t *acl_t
|
|
|
|
|
|
|
|
int acl_free(void *obj)
|
|
|
|
acl_t acl_get_file(const char *path, int type)
|
2018-07-05 23:33:32 +00:00
|
|
|
acl_t acl_get_fd(int fd)
|
2018-07-05 20:56:54 +00:00
|
|
|
int acl_set_file(const char *path, int type, acl_t acl)
|
2018-07-05 23:33:32 +00:00
|
|
|
int acl_set_fd(int fd, acl_t acl)
|
2014-04-13 18:26:46 +00:00
|
|
|
acl_t acl_from_text(const char *buf)
|
|
|
|
char *acl_to_text(acl_t acl, ssize_t *len)
|
|
|
|
|
|
|
|
cdef extern from "acl/libacl.h":
|
2014-08-01 13:50:18 +00:00
|
|
|
int acl_extended_file(const char *path)
|
2018-07-05 23:33:32 +00:00
|
|
|
int acl_extended_fd(int fd)
|
2014-04-13 18:26:46 +00:00
|
|
|
|
2016-05-17 22:22:49 +00:00
|
|
|
cdef extern from "linux/fs.h":
|
|
|
|
# ioctls
|
|
|
|
int FS_IOC_SETFLAGS
|
|
|
|
int FS_IOC_GETFLAGS
|
|
|
|
|
|
|
|
# inode flags
|
|
|
|
int FS_NODUMP_FL
|
|
|
|
int FS_IMMUTABLE_FL
|
|
|
|
int FS_APPEND_FL
|
|
|
|
int FS_COMPR_FL
|
|
|
|
|
2016-05-28 21:51:13 +00:00
|
|
|
cdef extern from "sys/ioctl.h":
|
2016-05-17 22:22:49 +00:00
|
|
|
int ioctl(int fildes, int request, ...)
|
|
|
|
|
2017-04-19 09:31:40 +00:00
|
|
|
cdef extern from "unistd.h":
|
|
|
|
int _SC_PAGESIZE
|
|
|
|
long sysconf(int name)
|
|
|
|
|
2016-05-17 22:22:49 +00:00
|
|
|
cdef extern from "string.h":
|
|
|
|
char *strerror(int errnum)
|
2014-04-13 18:26:46 +00:00
|
|
|
|
2014-04-13 21:41:04 +00:00
|
|
|
_comment_re = re.compile(' *#.*', re.M)
|
|
|
|
|
2014-04-27 12:17:09 +00:00
|
|
|
|
2018-07-08 12:37:04 +00:00
|
|
|
def listxattr(path, *, follow_symlinks=False):
|
2018-06-21 21:53:47 +00:00
|
|
|
def func(path, buf, size):
|
|
|
|
if isinstance(path, int):
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_flistxattr(path, <char *> buf, size)
|
2018-06-21 21:53:47 +00:00
|
|
|
else:
|
|
|
|
if follow_symlinks:
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_listxattr(path, <char *> buf, size)
|
2018-06-21 21:53:47 +00:00
|
|
|
else:
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_llistxattr(path, <char *> buf, size)
|
2018-06-21 21:53:47 +00:00
|
|
|
|
|
|
|
n, buf = _listxattr_inner(func, path)
|
2018-07-05 12:14:32 +00:00
|
|
|
return [name for name in split_string0(buf[:n])
|
2018-06-21 21:53:47 +00:00
|
|
|
if name and not name.startswith(b'system.posix_acl_')]
|
|
|
|
|
|
|
|
|
2018-07-08 12:37:04 +00:00
|
|
|
def getxattr(path, name, *, follow_symlinks=False):
|
2018-06-21 21:53:47 +00:00
|
|
|
def func(path, name, buf, size):
|
|
|
|
if isinstance(path, int):
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_fgetxattr(path, name, <char *> buf, size)
|
2018-06-21 21:53:47 +00:00
|
|
|
else:
|
|
|
|
if follow_symlinks:
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_getxattr(path, name, <char *> buf, size)
|
2018-06-21 21:53:47 +00:00
|
|
|
else:
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_lgetxattr(path, name, <char *> buf, size)
|
2018-06-21 21:53:47 +00:00
|
|
|
|
|
|
|
n, buf = _getxattr_inner(func, path, name)
|
2018-07-04 16:06:38 +00:00
|
|
|
return bytes(buf[:n])
|
2018-06-21 21:53:47 +00:00
|
|
|
|
|
|
|
|
2018-07-08 12:37:04 +00:00
|
|
|
def setxattr(path, name, value, *, follow_symlinks=False):
|
2018-06-21 21:53:47 +00:00
|
|
|
def func(path, name, value, size):
|
|
|
|
flags = 0
|
|
|
|
if isinstance(path, int):
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_fsetxattr(path, name, <char *> value, size, flags)
|
2018-06-21 21:53:47 +00:00
|
|
|
else:
|
|
|
|
if follow_symlinks:
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_setxattr(path, name, <char *> value, size, flags)
|
2018-06-21 21:53:47 +00:00
|
|
|
else:
|
2018-07-04 16:06:38 +00:00
|
|
|
return c_lsetxattr(path, name, <char *> value, size, flags)
|
2018-06-21 21:53:47 +00:00
|
|
|
|
|
|
|
_setxattr_inner(func, path, name, value)
|
|
|
|
|
|
|
|
|
2016-05-17 22:22:49 +00:00
|
|
|
BSD_TO_LINUX_FLAGS = {
|
|
|
|
stat.UF_NODUMP: FS_NODUMP_FL,
|
|
|
|
stat.UF_IMMUTABLE: FS_IMMUTABLE_FL,
|
|
|
|
stat.UF_APPEND: FS_APPEND_FL,
|
|
|
|
stat.UF_COMPRESSED: FS_COMPR_FL,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def set_flags(path, bsd_flags, fd=None):
|
2017-10-11 01:32:33 +00:00
|
|
|
if fd is None:
|
|
|
|
st = os.stat(path, follow_symlinks=False)
|
|
|
|
if stat.S_ISBLK(st.st_mode) or stat.S_ISCHR(st.st_mode) or stat.S_ISLNK(st.st_mode):
|
|
|
|
# see comment in get_flags()
|
|
|
|
return
|
2016-05-17 22:22:49 +00:00
|
|
|
cdef int flags = 0
|
|
|
|
for bsd_flag, linux_flag in BSD_TO_LINUX_FLAGS.items():
|
|
|
|
if bsd_flags & bsd_flag:
|
|
|
|
flags |= linux_flag
|
|
|
|
open_fd = fd is None
|
|
|
|
if open_fd:
|
|
|
|
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
|
|
|
|
try:
|
|
|
|
if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1:
|
2016-05-22 17:54:08 +00:00
|
|
|
error_number = errno.errno
|
|
|
|
if error_number != errno.EOPNOTSUPP:
|
|
|
|
raise OSError(error_number, strerror(error_number).decode(), path)
|
2016-05-17 22:22:49 +00:00
|
|
|
finally:
|
|
|
|
if open_fd:
|
|
|
|
os.close(fd)
|
|
|
|
|
|
|
|
|
2018-07-06 17:41:44 +00:00
|
|
|
def get_flags(path, st, fd=None):
|
2017-10-11 01:32:33 +00:00
|
|
|
if stat.S_ISBLK(st.st_mode) or stat.S_ISCHR(st.st_mode) or stat.S_ISLNK(st.st_mode):
|
|
|
|
# avoid opening devices files - trying to open non-present devices can be rather slow.
|
|
|
|
# avoid opening symlinks, O_NOFOLLOW would make the open() fail anyway.
|
|
|
|
return 0
|
2016-05-17 22:22:49 +00:00
|
|
|
cdef int linux_flags
|
2018-07-06 17:41:44 +00:00
|
|
|
open_fd = fd is None
|
|
|
|
if open_fd:
|
|
|
|
try:
|
|
|
|
fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
|
|
|
|
except OSError:
|
|
|
|
return 0
|
2016-05-17 22:22:49 +00:00
|
|
|
try:
|
|
|
|
if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1:
|
|
|
|
return 0
|
|
|
|
finally:
|
2018-07-06 17:41:44 +00:00
|
|
|
if open_fd:
|
|
|
|
os.close(fd)
|
2016-05-17 22:22:49 +00:00
|
|
|
bsd_flags = 0
|
|
|
|
for bsd_flag, linux_flag in BSD_TO_LINUX_FLAGS.items():
|
|
|
|
if linux_flags & linux_flag:
|
|
|
|
bsd_flags |= bsd_flag
|
|
|
|
return bsd_flags
|
|
|
|
|
|
|
|
|
2014-04-27 12:29:03 +00:00
|
|
|
def acl_use_local_uid_gid(acl):
|
2014-04-27 12:17:09 +00:00
|
|
|
"""Replace the user/group field with the local uid/gid if possible
|
|
|
|
"""
|
2022-09-14 11:09:43 +00:00
|
|
|
assert isinstance(acl, bytes)
|
2014-04-27 12:17:09 +00:00
|
|
|
entries = []
|
2015-10-18 23:31:06 +00:00
|
|
|
for entry in safe_decode(acl).split('\n'):
|
2014-04-27 12:17:09 +00:00
|
|
|
if entry:
|
|
|
|
fields = entry.split(':')
|
|
|
|
if fields[0] == 'user' and fields[1]:
|
2015-10-18 22:28:53 +00:00
|
|
|
fields[1] = str(user2uid(fields[1], fields[3]))
|
2014-04-27 12:17:09 +00:00
|
|
|
elif fields[0] == 'group' and fields[1]:
|
2015-10-18 22:28:53 +00:00
|
|
|
fields[1] = str(group2gid(fields[1], fields[3]))
|
2015-10-18 21:20:01 +00:00
|
|
|
entries.append(':'.join(fields[:3]))
|
2015-10-18 23:31:06 +00:00
|
|
|
return safe_encode('\n'.join(entries))
|
2014-04-27 12:17:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
cdef acl_append_numeric_ids(acl):
|
2014-04-13 18:26:46 +00:00
|
|
|
"""Extend the "POSIX 1003.1e draft standard 17" format with an additional uid/gid field
|
|
|
|
"""
|
2022-09-14 11:09:43 +00:00
|
|
|
assert isinstance(acl, bytes)
|
2014-04-13 18:26:46 +00:00
|
|
|
entries = []
|
2015-10-18 23:31:06 +00:00
|
|
|
for entry in _comment_re.sub('', safe_decode(acl)).split('\n'):
|
2014-04-13 18:26:46 +00:00
|
|
|
if entry:
|
|
|
|
type, name, permission = entry.split(':')
|
|
|
|
if name and type == 'user':
|
|
|
|
entries.append(':'.join([type, name, permission, str(user2uid(name, name))]))
|
|
|
|
elif name and type == 'group':
|
|
|
|
entries.append(':'.join([type, name, permission, str(group2gid(name, name))]))
|
|
|
|
else:
|
|
|
|
entries.append(entry)
|
2015-10-18 23:31:06 +00:00
|
|
|
return safe_encode('\n'.join(entries))
|
2014-04-13 18:26:46 +00:00
|
|
|
|
|
|
|
|
2014-04-27 12:17:09 +00:00
|
|
|
cdef acl_numeric_ids(acl):
|
2014-04-13 18:26:46 +00:00
|
|
|
"""Replace the "POSIX 1003.1e draft standard 17" user/group field with uid/gid
|
|
|
|
"""
|
2022-09-14 11:09:43 +00:00
|
|
|
assert isinstance(acl, bytes)
|
2014-04-13 18:26:46 +00:00
|
|
|
entries = []
|
2015-10-18 23:31:06 +00:00
|
|
|
for entry in _comment_re.sub('', safe_decode(acl)).split('\n'):
|
2014-04-13 18:26:46 +00:00
|
|
|
if entry:
|
|
|
|
type, name, permission = entry.split(':')
|
|
|
|
if name and type == 'user':
|
2014-04-29 20:29:26 +00:00
|
|
|
uid = str(user2uid(name, name))
|
|
|
|
entries.append(':'.join([type, uid, permission, uid]))
|
2014-04-13 18:26:46 +00:00
|
|
|
elif name and type == 'group':
|
2014-04-29 20:29:26 +00:00
|
|
|
gid = str(group2gid(name, name))
|
|
|
|
entries.append(':'.join([type, gid, permission, gid]))
|
2014-04-13 18:26:46 +00:00
|
|
|
else:
|
|
|
|
entries.append(entry)
|
2015-10-18 23:31:06 +00:00
|
|
|
return safe_encode('\n'.join(entries))
|
2014-04-13 18:26:46 +00:00
|
|
|
|
|
|
|
|
2021-04-16 13:02:16 +00:00
|
|
|
def acl_get(path, item, st, numeric_ids=False, fd=None):
|
2014-04-13 18:26:46 +00:00
|
|
|
cdef acl_t default_acl = NULL
|
|
|
|
cdef acl_t access_acl = NULL
|
|
|
|
cdef char *default_text = NULL
|
|
|
|
cdef char *access_text = NULL
|
2024-03-02 18:59:04 +00:00
|
|
|
cdef int ret = 0
|
2014-04-13 18:26:46 +00:00
|
|
|
|
2018-07-05 23:33:32 +00:00
|
|
|
if stat.S_ISLNK(st.st_mode):
|
2018-08-14 15:30:27 +00:00
|
|
|
# symlinks can not have ACLs
|
2018-07-05 23:33:32 +00:00
|
|
|
return
|
2018-08-14 15:30:27 +00:00
|
|
|
if isinstance(path, str):
|
|
|
|
path = os.fsencode(path)
|
2024-03-02 18:59:04 +00:00
|
|
|
if fd is not None:
|
|
|
|
ret = acl_extended_fd(fd)
|
|
|
|
else:
|
|
|
|
ret = acl_extended_file(path)
|
|
|
|
if ret == 0:
|
|
|
|
# there is no ACL defining permissions other than those defined by the traditional file permission bits.
|
2014-04-13 18:26:46 +00:00
|
|
|
return
|
2024-03-02 18:59:04 +00:00
|
|
|
if ret < 0:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
2021-04-16 13:02:16 +00:00
|
|
|
if numeric_ids:
|
2014-04-13 18:26:46 +00:00
|
|
|
converter = acl_numeric_ids
|
|
|
|
else:
|
|
|
|
converter = acl_append_numeric_ids
|
|
|
|
try:
|
2018-07-05 23:33:32 +00:00
|
|
|
if fd is not None:
|
|
|
|
access_acl = acl_get_fd(fd)
|
|
|
|
else:
|
|
|
|
access_acl = acl_get_file(path, ACL_TYPE_ACCESS)
|
2024-02-24 22:15:42 +00:00
|
|
|
if access_acl == NULL:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
if stat.S_ISDIR(st.st_mode):
|
|
|
|
# only directories can have a default ACL. there is no fd-based api to get it.
|
|
|
|
default_acl = acl_get_file(path, ACL_TYPE_DEFAULT)
|
|
|
|
if default_acl == NULL:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
if access_acl:
|
2022-09-14 11:09:43 +00:00
|
|
|
access_text = acl_to_text(access_acl, NULL)
|
2024-02-26 19:07:10 +00:00
|
|
|
if access_text == NULL:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
item['acl_access'] = converter(access_text)
|
2024-02-24 22:15:42 +00:00
|
|
|
if default_acl:
|
|
|
|
default_text = acl_to_text(default_acl, NULL)
|
2024-02-26 19:07:10 +00:00
|
|
|
if default_text == NULL:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
item['acl_default'] = converter(default_text)
|
2022-09-14 11:09:43 +00:00
|
|
|
finally:
|
|
|
|
acl_free(access_text)
|
|
|
|
acl_free(access_acl)
|
2014-04-13 18:26:46 +00:00
|
|
|
acl_free(default_text)
|
|
|
|
acl_free(default_acl)
|
|
|
|
|
|
|
|
|
2021-04-16 13:02:16 +00:00
|
|
|
def acl_set(path, item, numeric_ids=False, fd=None):
|
2014-04-13 18:26:46 +00:00
|
|
|
cdef acl_t access_acl = NULL
|
|
|
|
cdef acl_t default_acl = NULL
|
2015-04-01 00:11:36 +00:00
|
|
|
|
2018-12-08 19:01:30 +00:00
|
|
|
if stat.S_ISLNK(item.get('mode', 0)):
|
|
|
|
# Linux does not support setting ACLs on symlinks
|
|
|
|
return
|
|
|
|
|
2018-07-05 23:33:32 +00:00
|
|
|
if fd is None and isinstance(path, str):
|
2018-07-05 20:05:15 +00:00
|
|
|
path = os.fsencode(path)
|
2021-04-16 13:02:16 +00:00
|
|
|
if numeric_ids:
|
2014-04-27 12:17:09 +00:00
|
|
|
converter = posix_acl_use_stored_uid_gid
|
2014-04-13 18:26:46 +00:00
|
|
|
else:
|
|
|
|
converter = acl_use_local_uid_gid
|
2016-05-31 23:45:45 +00:00
|
|
|
access_text = item.get('acl_access')
|
2022-09-14 11:09:43 +00:00
|
|
|
if access_text is not None:
|
2014-04-13 18:26:46 +00:00
|
|
|
try:
|
2024-02-26 19:07:10 +00:00
|
|
|
access_acl = acl_from_text(<bytes>converter(access_text))
|
|
|
|
if access_acl == NULL:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
if fd is not None:
|
|
|
|
if acl_set_fd(fd, access_acl) == -1:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
else:
|
|
|
|
if acl_set_file(path, ACL_TYPE_ACCESS, access_acl) == -1:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
2014-04-13 18:26:46 +00:00
|
|
|
finally:
|
|
|
|
acl_free(access_acl)
|
2018-07-05 20:05:15 +00:00
|
|
|
default_text = item.get('acl_default')
|
2022-09-14 11:09:43 +00:00
|
|
|
if default_text is not None:
|
2014-04-13 18:26:46 +00:00
|
|
|
try:
|
2024-02-26 19:07:10 +00:00
|
|
|
default_acl = acl_from_text(<bytes>converter(default_text))
|
|
|
|
if default_acl == NULL:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
|
|
|
# only directories can get a default ACL. there is no fd-based api to set it.
|
|
|
|
if acl_set_file(path, ACL_TYPE_DEFAULT, default_acl) == -1:
|
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
|
2014-04-13 18:26:46 +00:00
|
|
|
finally:
|
|
|
|
acl_free(default_acl)
|
Improve LoggedIO write performance, make commit mechanism more solid
- Instead of very small (5 MB-ish) segment files, use larger ones
- Request asynchronous write-out or write-through (TODO) where it is supported,
to achieve a continuously high throughput for writes
- Instead of depending on ordered writes (write data, commit tag, sync)
for consistency, do a double-sync commit as more serious RDBMS also do
i.e. write data, sync, write commit tag, sync
Since commits are very expensive in Borg at the moment this makes no
difference performance-wise.
New platform APIs: SyncFile, sync_dir
[x] Naive implementation (equivalent to what Borg did before)
[x] Linux implementation
[ ] Windows implementation
[-] OSX implementation (F_FULLSYNC)
2016-05-14 20:46:41 +00:00
|
|
|
|
2018-07-05 20:05:15 +00:00
|
|
|
|
Improve LoggedIO write performance, make commit mechanism more solid
- Instead of very small (5 MB-ish) segment files, use larger ones
- Request asynchronous write-out or write-through (TODO) where it is supported,
to achieve a continuously high throughput for writes
- Instead of depending on ordered writes (write data, commit tag, sync)
for consistency, do a double-sync commit as more serious RDBMS also do
i.e. write data, sync, write commit tag, sync
Since commits are very expensive in Borg at the moment this makes no
difference performance-wise.
New platform APIs: SyncFile, sync_dir
[x] Naive implementation (equivalent to what Borg did before)
[x] Linux implementation
[ ] Windows implementation
[-] OSX implementation (F_FULLSYNC)
2016-05-14 20:46:41 +00:00
|
|
|
cdef _sync_file_range(fd, offset, length, flags):
|
|
|
|
assert offset & PAGE_MASK == 0, "offset %d not page-aligned" % offset
|
|
|
|
assert length & PAGE_MASK == 0, "length %d not page-aligned" % length
|
|
|
|
if sync_file_range(fd, offset, length, flags) != 0:
|
2016-06-18 12:39:22 +00:00
|
|
|
raise OSError(errno.errno, os.strerror(errno.errno))
|
2017-01-30 02:11:42 +00:00
|
|
|
safe_fadvise(fd, offset, length, 'DONTNEED')
|
Improve LoggedIO write performance, make commit mechanism more solid
- Instead of very small (5 MB-ish) segment files, use larger ones
- Request asynchronous write-out or write-through (TODO) where it is supported,
to achieve a continuously high throughput for writes
- Instead of depending on ordered writes (write data, commit tag, sync)
for consistency, do a double-sync commit as more serious RDBMS also do
i.e. write data, sync, write commit tag, sync
Since commits are very expensive in Borg at the moment this makes no
difference performance-wise.
New platform APIs: SyncFile, sync_dir
[x] Naive implementation (equivalent to what Borg did before)
[x] Linux implementation
[ ] Windows implementation
[-] OSX implementation (F_FULLSYNC)
2016-05-14 20:46:41 +00:00
|
|
|
|
|
|
|
|
2019-02-05 00:30:54 +00:00
|
|
|
cdef unsigned PAGE_MASK = sysconf(_SC_PAGESIZE) - 1
|
Improve LoggedIO write performance, make commit mechanism more solid
- Instead of very small (5 MB-ish) segment files, use larger ones
- Request asynchronous write-out or write-through (TODO) where it is supported,
to achieve a continuously high throughput for writes
- Instead of depending on ordered writes (write data, commit tag, sync)
for consistency, do a double-sync commit as more serious RDBMS also do
i.e. write data, sync, write commit tag, sync
Since commits are very expensive in Borg at the moment this makes no
difference performance-wise.
New platform APIs: SyncFile, sync_dir
[x] Naive implementation (equivalent to what Borg did before)
[x] Linux implementation
[ ] Windows implementation
[-] OSX implementation (F_FULLSYNC)
2016-05-14 20:46:41 +00:00
|
|
|
|
|
|
|
|
2020-01-02 17:54:00 +00:00
|
|
|
if 'basesyncfile' in workarounds or not SYNC_FILE_RANGE_LOADED:
|
2019-02-05 00:30:54 +00:00
|
|
|
class SyncFile(BaseSyncFile):
|
2019-08-22 23:28:30 +00:00
|
|
|
# if we are on platforms with a broken or not implemented sync_file_range,
|
|
|
|
# use the more generic BaseSyncFile to avoid issues.
|
|
|
|
# see basesyncfile description in our docs for details.
|
2019-02-05 00:30:54 +00:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
# a real Linux, so we can do better. :)
|
|
|
|
class SyncFile(BaseSyncFile):
|
|
|
|
"""
|
|
|
|
Implemented using sync_file_range for asynchronous write-out and fdatasync for actual durability.
|
|
|
|
|
|
|
|
"write-out" means that dirty pages (= data that was written) are submitted to an I/O queue and will be send to
|
|
|
|
disk in the immediate future.
|
|
|
|
"""
|
|
|
|
|
2022-02-16 22:13:24 +00:00
|
|
|
def __init__(self, path, *, fd=None, binary=False):
|
|
|
|
super().__init__(path, fd=fd, binary=binary)
|
2019-02-05 00:30:54 +00:00
|
|
|
self.offset = 0
|
|
|
|
self.write_window = (16 * 1024 ** 2) & ~PAGE_MASK
|
|
|
|
self.last_sync = 0
|
|
|
|
self.pending_sync = None
|
|
|
|
|
|
|
|
def write(self, data):
|
2022-02-16 22:13:24 +00:00
|
|
|
self.offset += self.f.write(data)
|
2019-02-05 00:30:54 +00:00
|
|
|
offset = self.offset & ~PAGE_MASK
|
|
|
|
if offset >= self.last_sync + self.write_window:
|
2022-02-16 22:13:24 +00:00
|
|
|
self.f.flush()
|
|
|
|
_sync_file_range(self.fd, self.last_sync, offset - self.last_sync, SYNC_FILE_RANGE_WRITE)
|
2019-02-05 00:30:54 +00:00
|
|
|
if self.pending_sync is not None:
|
2022-02-16 22:13:24 +00:00
|
|
|
_sync_file_range(self.fd, self.pending_sync, self.last_sync - self.pending_sync,
|
2019-02-05 00:30:54 +00:00
|
|
|
SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WAIT_AFTER)
|
|
|
|
self.pending_sync = self.last_sync
|
|
|
|
self.last_sync = offset
|
|
|
|
|
|
|
|
def sync(self):
|
2022-02-16 22:13:24 +00:00
|
|
|
self.f.flush()
|
|
|
|
os.fdatasync(self.fd)
|
2019-02-05 00:30:54 +00:00
|
|
|
# tell the OS that it does not need to cache what we just wrote,
|
|
|
|
# avoids spoiling the cache for the OS and other processes.
|
2022-02-16 22:13:24 +00:00
|
|
|
safe_fadvise(self.fd, 0, 0, 'DONTNEED')
|