check --repair: fix potential data loss, fixes #5325

We already have used SaveFile context manager since long at other places.
By using it, the original segment file stays in place until recovery of it
is completed (writing/syncing into *.tmp).
On successful completion, .tmp is renamed over original + dir syncing.
If aborted by some exception, including Ctrl-C, the original file is unmodified.
This commit is contained in:
Thomas Waldmann 2020-09-06 11:57:21 +02:00
parent e70e49f47e
commit b198160257
1 changed files with 7 additions and 9 deletions

View File

@ -1486,23 +1486,21 @@ class LoggedIO:
logger.info('attempting to recover ' + filename)
if segment in self.fds:
del self.fds[segment]
backup_filename = filename + '.beforerecover'
os.rename(filename, backup_filename)
if os.path.getsize(backup_filename) < MAGIC_LEN + self.header_fmt.size:
if os.path.getsize(filename) < MAGIC_LEN + self.header_fmt.size:
# this is either a zero-byte file (which would crash mmap() below) or otherwise
# just too small to be a valid non-empty segment file, so do a shortcut here:
with open(filename, 'wb') as fd:
with SaveFile(filename, binary=True) as fd:
fd.write(MAGIC)
return
with open(backup_filename, 'rb') as backup_fd:
with open(filename, 'rb') as src_fd:
# note: file must not be 0 size or mmap() will crash.
with mmap.mmap(backup_fd.fileno(), 0, access=mmap.ACCESS_READ) as mm:
with mmap.mmap(src_fd.fileno(), 0, access=mmap.ACCESS_READ) as mm:
# memoryview context manager is problematic, see https://bugs.python.org/issue35686
data = memoryview(mm)
d = data
try:
with open(filename, 'wb') as fd:
fd.write(MAGIC)
with SaveFile(filename, binary=True) as dst_fd:
dst_fd.write(MAGIC)
while len(d) >= self.header_fmt.size:
crc, size, tag = self.header_fmt.unpack(d[:self.header_fmt.size])
if size < self.header_fmt.size or size > len(d):
@ -1511,7 +1509,7 @@ class LoggedIO:
if crc32(d[4:size]) & 0xffffffff != crc:
d = d[1:]
continue
fd.write(d[:size])
dst_fd.write(d[:size])
d = d[size:]
finally:
del d