1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-13 07:33:47 +00:00

SaveFile: fix race conditions

Thanks to Andrey Bienkowski (@hexagonrecursion) for reporting this and writing reproducer code.

Changes:
- use different, randomly (but recognizably) named temp files while writing (securely made by os.mkstemp())
- make sure temp files are cleaned up in normal and error conditions
- SyncFile can now get corresponding pair of path + open os-level fd
- cleaned up: fd now means os-level fd, f means python-file-like object
- fixed a caller of SaveFile
This commit is contained in:
Thomas Waldmann 2022-02-16 23:13:24 +01:00
parent da763cedda
commit 14b5c005d8
3 changed files with 57 additions and 45 deletions

View file

@ -100,14 +100,8 @@ def get_cache_dir():
# http://www.bford.info/cachedir/spec.html
""").encode('ascii')
from ..platform import SaveFile
try:
with SaveFile(cache_tag_fn, binary=True) as fd:
fd.write(cache_tag_contents)
except FileExistsError:
# if we have multiple SaveFile calls running in parallel for same cache_tag_fn,
# it is fine if just one (usually first/quicker one) of them run gets through
# and all others raise FileExistsError.
pass
with SaveFile(cache_tag_fn, binary=True) as fd:
fd.write(cache_tag_contents)
return cache_dir

View file

@ -1,6 +1,7 @@
import errno
import os
import socket
import tempfile
import uuid
from borg.helpers import safe_unlink
@ -147,10 +148,22 @@ class SyncFile:
TODO: A Windows implementation should use CreateFile with FILE_FLAG_WRITE_THROUGH.
"""
def __init__(self, path, binary=False):
def __init__(self, path, *, fd=None, binary=False):
"""
Open a SyncFile.
:param path: full path/filename
:param fd: additionally to path, it is possible to give an already open OS-level fd
that corresponds to path (like from os.open(path, ...) or os.mkstemp(...))
:param binary: whether to open in binary mode, default is False.
"""
mode = 'xb' if binary else 'x' # x -> raise FileExists exception in open() if file exists already
self.fd = open(path, mode)
self.fileno = self.fd.fileno()
self.path = path
if fd is None:
self.f = open(path, mode=mode) # python file object
else:
self.f = os.fdopen(fd, mode=mode)
self.fd = self.f.fileno() # OS-level fd
def __enter__(self):
return self
@ -159,7 +172,7 @@ class SyncFile:
self.close()
def write(self, data):
self.fd.write(data)
self.f.write(data)
def sync(self):
"""
@ -167,21 +180,21 @@ class SyncFile:
after sync().
"""
from .. import platform
self.fd.flush()
platform.fdatasync(self.fileno)
self.f.flush()
platform.fdatasync(self.fd)
# tell the OS that it does not need to cache what we just wrote,
# avoids spoiling the cache for the OS and other processes.
safe_fadvise(self.fileno, 0, 0, 'DONTNEED')
safe_fadvise(self.fd, 0, 0, 'DONTNEED')
def close(self):
"""sync() and close."""
from .. import platform
dirname = None
try:
dirname = os.path.dirname(self.fd.name)
dirname = os.path.dirname(self.path)
self.sync()
finally:
self.fd.close()
self.f.close()
if dirname:
platform.sync_dir(dirname)
@ -196,36 +209,41 @@ class SaveFile:
atomically and won't become corrupted, even on power failures or
crashes (for caveats see SyncFile).
Calling SaveFile(path) in parallel for same path is safe (even when using the same SUFFIX), but the
caller needs to catch potential FileExistsError exceptions that may happen in this racy situation.
The caller executing SaveFile->SyncFile->open() first will win.
All other callers will raise a FileExistsError in open(), at least until the os.replace is executed.
SaveFile can safely by used in parallel (e.g. by multiple processes) to write
to the same target path. Whatever writer finishes last (executes the os.replace
last) "wins" and has successfully written its content to the target path.
Internally used temporary files are created in the target directory and are
named <BASENAME>-<RANDOMCHARS>.tmp and cleaned up in normal and error conditions.
"""
SUFFIX = '.tmp'
def __init__(self, path, binary=False):
self.binary = binary
self.path = path
self.tmppath = self.path + self.SUFFIX
self.dir = os.path.dirname(path)
self.tmp_prefix = os.path.basename(path) + '-'
self.tmp_fd = None # OS-level fd
self.tmp_fname = None # full path/filename corresponding to self.tmp_fd
self.f = None # python-file-like SyncFile
def __enter__(self):
from .. import platform
try:
safe_unlink(self.tmppath)
except FileNotFoundError:
pass
self.fd = platform.SyncFile(self.tmppath, self.binary)
return self.fd
self.tmp_fd, self.tmp_fname = tempfile.mkstemp(prefix=self.tmp_prefix, suffix='.tmp', dir=self.dir)
self.f = platform.SyncFile(self.tmp_fname, fd=self.tmp_fd, binary=self.binary)
return self.f
def __exit__(self, exc_type, exc_val, exc_tb):
from .. import platform
self.fd.close()
self.f.close() # this indirectly also closes self.tmp_fd
self.tmp_fd = None
if exc_type is not None:
safe_unlink(self.tmppath)
return
os.replace(self.tmppath, self.path)
platform.sync_dir(os.path.dirname(self.path))
safe_unlink(self.tmp_fname) # with-body has failed, clean up tmp file
return # continue processing the exception normally
try:
os.replace(self.tmp_fname, self.path) # POSIX: atomic rename
except OSError:
safe_unlink(self.tmp_fname) # rename has failed, clean up tmp file
raise
finally:
platform.sync_dir(self.dir)
def swidth(s):

View file

@ -332,28 +332,28 @@ else:
disk in the immediate future.
"""
def __init__(self, path, binary=False):
super().__init__(path, binary)
def __init__(self, path, *, fd=None, binary=False):
super().__init__(path, fd=fd, binary=binary)
self.offset = 0
self.write_window = (16 * 1024 ** 2) & ~PAGE_MASK
self.last_sync = 0
self.pending_sync = None
def write(self, data):
self.offset += self.fd.write(data)
self.offset += self.f.write(data)
offset = self.offset & ~PAGE_MASK
if offset >= self.last_sync + self.write_window:
self.fd.flush()
_sync_file_range(self.fileno, self.last_sync, offset - self.last_sync, SYNC_FILE_RANGE_WRITE)
self.f.flush()
_sync_file_range(self.fd, self.last_sync, offset - self.last_sync, SYNC_FILE_RANGE_WRITE)
if self.pending_sync is not None:
_sync_file_range(self.fileno, self.pending_sync, self.last_sync - self.pending_sync,
_sync_file_range(self.fd, self.pending_sync, self.last_sync - self.pending_sync,
SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WAIT_AFTER)
self.pending_sync = self.last_sync
self.last_sync = offset
def sync(self):
self.fd.flush()
os.fdatasync(self.fileno)
self.f.flush()
os.fdatasync(self.fd)
# tell the OS that it does not need to cache what we just wrote,
# avoids spoiling the cache for the OS and other processes.
safe_fadvise(self.fileno, 0, 0, 'DONTNEED')
safe_fadvise(self.fd, 0, 0, 'DONTNEED')