1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-23 22:51:35 +00:00

use *at style functions (e.g. openat, statat) to avoid races

races via changing path components can be avoided by opening the
parent directory and using parent_fd + file_name combination with
*at style functions to access the directories' contents.
This commit is contained in:
Thomas Waldmann 2018-08-13 01:18:00 +02:00
parent ad5b9a1dfd
commit 833c49f834
3 changed files with 138 additions and 87 deletions

View file

@ -196,9 +196,9 @@ def backup_io_iter(iterator):
@contextmanager @contextmanager
def OsOpen(path, flags, noatime=False, op='open'): def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op='open'):
with backup_io(op): with backup_io(op):
fd = os_open(path, flags, noatime) fd = os_open(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=noatime)
try: try:
yield fd yield fd
finally: finally:
@ -1076,31 +1076,46 @@ def create_helper(self, path, st, status=None, hardlinkable=True):
if hardlink_master: if hardlink_master:
self.hard_links[(st.st_ino, st.st_dev)] = safe_path self.hard_links[(st.st_ino, st.st_dev)] = safe_path
def process_dir(self, *, path, st): def process_dir(self, *, path, fd, st):
with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
item.update(self.metadata_collector.stat_attrs(st, path)) item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status return status
def process_fifo(self, *, path, st): def process_fifo(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo
item.update(self.metadata_collector.stat_attrs(st, path)) with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
return status with backup_io('fstat'):
curr_st = os.fstat(fd)
# XXX do some checks here: st vs. curr_st
assert stat.S_ISFIFO(curr_st.st_mode)
# make sure stats refer to same object that we are processing below
st = curr_st
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status
def process_dev(self, *, path, st, dev_type): def process_dev(self, *, path, parent_fd, name, st, dev_type):
with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device
item.rdev = st.st_rdev with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
item.update(self.metadata_collector.stat_attrs(st, path)) with backup_io('fstat'):
return status curr_st = os.fstat(fd)
# XXX do some checks here: st vs. curr_st
assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode)
# make sure stats refer to same object that we are processing below
st = curr_st
item.rdev = st.st_rdev
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status
def process_symlink(self, *, path, st): def process_symlink(self, *, path, parent_fd, name, st):
# note: using hardlinkable=False because we can not support hardlinked symlinks, # note: using hardlinkable=False because we can not support hardlinked symlinks,
# due to the dual-use of item.source, see issue #2343: # due to the dual-use of item.source, see issue #2343:
# hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks. # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks.
with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master): with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
fname = name if name is not None and parent_fd is not None else path
with backup_io('readlink'): with backup_io('readlink'):
source = os.readlink(path) source = os.readlink(fname, dir_fd=parent_fd)
item.source = source item.source = source
item.update(self.metadata_collector.stat_attrs(st, path)) item.update(self.metadata_collector.stat_attrs(st, path)) # can't use FD here?
return status return status
def process_stdin(self, *, path, cache): def process_stdin(self, *, path, cache):
@ -1120,9 +1135,9 @@ def process_stdin(self, *, path, cache):
self.add_item(item, stats=self.stats) self.add_item(item, stats=self.stats)
return 'i' # stdin return 'i' # stdin
def process_file(self, *, path, st, cache): def process_file(self, *, path, parent_fd, name, st, cache):
with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet
with OsOpen(path, flags_normal, noatime=True) as fd: with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
with backup_io('fstat'): with backup_io('fstat'):
curr_st = os.fstat(fd) curr_st = os.fstat(fd)
# XXX do some checks here: st vs. curr_st # XXX do some checks here: st vs. curr_st
@ -1172,7 +1187,7 @@ def process_file(self, *, path, st, cache):
# we processed a special file like a regular file. reflect that in mode, # we processed a special file like a regular file. reflect that in mode,
# so it can be extracted / accessed in FUSE mount like a regular file: # so it can be extracted / accessed in FUSE mount like a regular file:
item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
return status return status
def valid_msgpacked_dict(d, keys_serialized): def valid_msgpacked_dict(d, keys_serialized):

View file

@ -34,7 +34,7 @@
from . import helpers from . import helpers
from .algorithms.checksums import crc32 from .algorithms.checksums import crc32
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
from .archive import BackupError, BackupOSError, backup_io from .archive import BackupError, BackupOSError, backup_io, OsOpen
from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
from .cache import Cache, assert_secure, SecurityManager from .cache import Cache, assert_secure, SecurityManager
from .constants import * # NOQA from .constants import * # NOQA
@ -66,6 +66,7 @@
from .helpers import popen_with_error_handling, prepare_subprocess_env from .helpers import popen_with_error_handling, prepare_subprocess_env
from .helpers import dash_open from .helpers import dash_open
from .helpers import umount from .helpers import umount
from .helpers import flags_root, flags_dir
from .helpers import msgpack from .helpers import msgpack
from .nanorst import rst_to_terminal from .nanorst import rst_to_terminal
from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
@ -479,20 +480,23 @@ def create_inner(archive, cache, fso):
self.print_file_status(status, path) self.print_file_status(status, path)
continue continue
path = os.path.normpath(path) path = os.path.normpath(path)
try: parent_dir = os.path.dirname(path) or '.'
st = os.stat(path, follow_symlinks=False) name = os.path.basename(path)
except OSError as e: with OsOpen(path=parent_dir, flags=flags_root, noatime=True, op='open_root') as parent_fd:
self.print_warning('%s: %s', path, e) try:
continue st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
if args.one_file_system: except OSError as e:
restrict_dev = st.st_dev self.print_warning('%s: %s', path, e)
else: continue
restrict_dev = None if args.one_file_system:
self._process(path=path, restrict_dev = st.st_dev
fso=fso, cache=cache, matcher=matcher, else:
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, restrict_dev = None
keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, self._process(path=path, parent_fd=parent_fd, name=name,
restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) fso=fso, cache=cache, matcher=matcher,
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
if not dry_run: if not dry_run:
archive.save(comment=args.comment, timestamp=args.timestamp) archive.save(comment=args.comment, timestamp=args.timestamp)
if args.progress: if args.progress:
@ -544,12 +548,12 @@ def create_inner(archive, cache, fso):
create_inner(None, None, None) create_inner(None, None, None)
return self.exit_code return self.exit_code
def _process(self, *, path, def _process(self, *, path, parent_fd=None, name=None,
fso, cache, matcher, fso, cache, matcher,
exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes, exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes,
restrict_dev, read_special=False, dry_run=False): restrict_dev, read_special=False, dry_run=False):
""" """
Process *path* recursively according to the various parameters. Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
This should only raise on critical errors. Per-item errors must be handled within this method. This should only raise on critical errors. Per-item errors must be handled within this method.
""" """
@ -557,7 +561,7 @@ def _process(self, *, path,
recurse_excluded_dir = False recurse_excluded_dir = False
if matcher.match(path): if matcher.match(path):
with backup_io('stat'): with backup_io('stat'):
st = os.stat(path, follow_symlinks=False) st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
else: else:
self.print_file_status('x', path) self.print_file_status('x', path)
# get out here as quickly as possible: # get out here as quickly as possible:
@ -567,7 +571,7 @@ def _process(self, *, path,
if not matcher.recurse_dir: if not matcher.recurse_dir:
return return
with backup_io('stat'): with backup_io('stat'):
st = os.stat(path, follow_symlinks=False) st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
recurse_excluded_dir = stat.S_ISDIR(st.st_mode) recurse_excluded_dir = stat.S_ISDIR(st.st_mode)
if not recurse_excluded_dir: if not recurse_excluded_dir:
return return
@ -582,75 +586,85 @@ def _process(self, *, path,
if self.exclude_nodump: if self.exclude_nodump:
# Ignore if nodump flag is set # Ignore if nodump flag is set
with backup_io('flags'): with backup_io('flags'):
if get_flags(path, st) & stat.UF_NODUMP: if get_flags(path=path, st=st) & stat.UF_NODUMP:
self.print_file_status('x', path) self.print_file_status('x', path)
return return
if stat.S_ISREG(st.st_mode): if stat.S_ISREG(st.st_mode):
if not dry_run: if not dry_run:
status = fso.process_file(path=path, st=st, cache=cache) status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
elif stat.S_ISDIR(st.st_mode): elif stat.S_ISDIR(st.st_mode):
if recurse: with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) noatime=True, op='dir_open') as child_fd:
if tag_paths: with backup_io('fstat'):
# if we are already recursing in an excluded dir, we do not need to do anything else than curr_st = os.fstat(child_fd)
# returning (we do not need to archive or recurse into tagged directories), see #3991: # XXX do some checks here: st vs. curr_st
assert stat.S_ISDIR(curr_st.st_mode)
# make sure stats refer to same object that we are processing below
st = curr_st
if recurse:
tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
if tag_names:
# if we are already recursing in an excluded dir, we do not need to do anything else than
# returning (we do not need to archive or recurse into tagged directories), see #3991:
if not recurse_excluded_dir:
if keep_exclude_tags and not dry_run:
fso.process_dir(path=path, fd=child_fd, st=st)
for tag_name in tag_names:
tag_path = os.path.join(path, tag_name)
self._process(path=tag_path, parent_fd=child_fd, name=tag_name,
fso=fso, cache=cache, matcher=matcher,
exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
self.print_file_status('x', path)
return
if not dry_run:
if not recurse_excluded_dir: if not recurse_excluded_dir:
if keep_exclude_tags and not dry_run: status = fso.process_dir(path=path, fd=child_fd, st=st)
fso.process_dir(path=path, st=st) if recurse:
for tag_path in tag_paths: with backup_io('scandir'):
self._process(path=tag_path, entries = helpers.scandir_inorder(path=path, fd=child_fd)
fso=fso, cache=cache, matcher=matcher, for dirent in entries:
exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, normpath = os.path.normpath(dirent.path)
keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, self._process(path=normpath, parent_fd=child_fd, name=dirent.name,
restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) fso=fso, cache=cache, matcher=matcher,
self.print_file_status('x', path) exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
return keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
if not dry_run: restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
if not recurse_excluded_dir:
status = fso.process_dir(path=path, st=st)
if recurse:
with backup_io('scandir'):
entries = helpers.scandir_inorder(path)
for dirent in entries:
normpath = os.path.normpath(dirent.path)
self._process(path=normpath,
fso=fso, cache=cache, matcher=matcher,
exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
elif stat.S_ISLNK(st.st_mode): elif stat.S_ISLNK(st.st_mode):
if not dry_run: if not dry_run:
if not read_special: if not read_special:
status = fso.process_symlink(path=path, st=st) status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
else: else:
try: try:
st_target = os.stat(path) st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True)
except OSError: except OSError:
special = False special = False
else: else:
special = is_special(st_target.st_mode) special = is_special(st_target.st_mode)
if special: if special:
status = fso.process_file(path=path, st=st_target, cache=cache) # XXX must FOLLOW symlinks!
status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, cache=cache)
else: else:
status = fso.process_symlink(path=path, st=st) status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
elif stat.S_ISFIFO(st.st_mode): elif stat.S_ISFIFO(st.st_mode):
if not dry_run: if not dry_run:
if not read_special: if not read_special:
status = fso.process_fifo(path=path, st=st) status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
else: else:
status = fso.process_file(path=path, st=st, cache=cache) status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
elif stat.S_ISCHR(st.st_mode): elif stat.S_ISCHR(st.st_mode):
if not dry_run: if not dry_run:
if not read_special: if not read_special:
status = fso.process_dev(path=path, st=st, dev_type='c') status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c')
else: else:
status = fso.process_file(path=path, st=st, cache=cache) status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
elif stat.S_ISBLK(st.st_mode): elif stat.S_ISBLK(st.st_mode):
if not dry_run: if not dry_run:
if not read_special: if not read_special:
status = fso.process_dev(path=path, st=st, dev_type='b') status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b')
else: else:
status = fso.process_file(path=path, st=st, cache=cache) status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
elif stat.S_ISSOCK(st.st_mode): elif stat.S_ISSOCK(st.st_mode):
# Ignore unix sockets # Ignore unix sockets
return return

View file

@ -15,6 +15,9 @@
logger = create_logger() logger = create_logger()
py_37_plus = sys.version_info >= (3, 7)
def get_base_dir(): def get_base_dir():
"""Get home directory / base directory for borg: """Get home directory / base directory for borg:
@ -103,18 +106,19 @@ def dir_is_cachedir(path):
def dir_is_tagged(path, exclude_caches, exclude_if_present): def dir_is_tagged(path, exclude_caches, exclude_if_present):
"""Determines whether the specified path is excluded by being a cache """Determines whether the specified path is excluded by being a cache
directory or containing user-specified tag files/directories. Returns a directory or containing user-specified tag files/directories. Returns a
list of the paths of the tag files/directories (either CACHEDIR.TAG or the list of the names of the tag files/directories (either CACHEDIR.TAG or the
matching user-specified files/directories). matching user-specified files/directories).
""" """
tag_paths = [] # TODO: do operations based on the directory fd
tag_names = []
if exclude_caches and dir_is_cachedir(path): if exclude_caches and dir_is_cachedir(path):
tag_paths.append(os.path.join(path, CACHE_TAG_NAME)) tag_names.append(CACHE_TAG_NAME)
if exclude_if_present is not None: if exclude_if_present is not None:
for tag in exclude_if_present: for tag in exclude_if_present:
tag_path = os.path.join(path, tag) tag_path = os.path.join(path, tag)
if os.path.exists(tag_path): if os.path.exists(tag_path):
tag_paths.append(tag_path) tag_names.append(tag)
return tag_paths return tag_names
_safe_re = re.compile(r'^((\.\.)?/+)+') _safe_re = re.compile(r'^((\.\.)?/+)+')
@ -144,8 +148,10 @@ def scandir_keyfunc(dirent):
return (1, dirent.name) return (1, dirent.name)
def scandir_inorder(path='.'): def scandir_inorder(*, path, fd=None):
return sorted(os.scandir(path), key=scandir_keyfunc) # py37+ supports giving a fd instead of a path
arg = fd if fd is not None and py_37_plus else path
return sorted(os.scandir(arg), key=scandir_keyfunc)
def secure_erase(path): def secure_erase(path):
@ -199,23 +205,39 @@ def O_(*flags):
flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') # later: add 'NOFOLLOW' flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') # later: add 'NOFOLLOW'
flags_normal = flags_base | O_('RDONLY') flags_normal = flags_base | O_('RDONLY')
flags_noatime = flags_normal | O_('NOATIME') flags_noatime = flags_normal | O_('NOATIME')
flags_root = O_('RDONLY')
flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW')
def os_open(path, flags, noatime=False): def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False):
"""
Use os.open to open a fs item.
If parent_fd and name are given, they are preferred and openat will be used,
path is not used in this case.
:param path: full (but not necessarily absolute) path
:param parent_fd: open directory file descriptor
:param name: name relative to parent_fd
:param flags: open flags for os.open() (int)
:param noatime: True if access time shall be preserved
:return: file descriptor
"""
fname = name if name is not None and parent_fd is not None else path
_flags_normal = flags _flags_normal = flags
if noatime: if noatime:
_flags_noatime = _flags_normal | O_('NOATIME') _flags_noatime = _flags_normal | O_('NOATIME')
try: try:
# if we have O_NOATIME, this likely will succeed if we are root or owner of file: # if we have O_NOATIME, this likely will succeed if we are root or owner of file:
fd = os.open(path, _flags_noatime) fd = os.open(fname, _flags_noatime, dir_fd=parent_fd)
except PermissionError: except PermissionError:
if _flags_noatime == _flags_normal: if _flags_noatime == _flags_normal:
# we do not have O_NOATIME, no need to try again: # we do not have O_NOATIME, no need to try again:
raise raise
# Was this EPERM due to the O_NOATIME flag? Try again without it: # Was this EPERM due to the O_NOATIME flag? Try again without it:
fd = os.open(path, _flags_normal) fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
else: else:
fd = os.open(path, _flags_normal) fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
return fd return fd