1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-03 05:35:58 +00:00

fix race condition with data loss potential, fixes #3536

we discard all files cache entries referring to files
with timestamps AFTER we started the backup.

so, even in case we would back up an inconsistent file
that has been changed while we backed it up, we would
not have a files cache entry for it and would fully
read/chunk/hash it again in next backup.
This commit is contained in:
Thomas Waldmann 2024-09-20 22:23:43 +02:00
parent b60378cf0e
commit ec9d412756
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
5 changed files with 44 additions and 22 deletions

View file

@ -125,6 +125,7 @@ class Archiver(
def __init__(self, lock_wait=None, prog=None): def __init__(self, lock_wait=None, prog=None):
self.lock_wait = lock_wait self.lock_wait = lock_wait
self.prog = prog self.prog = prog
self.start_backup = None
def print_warning(self, msg, *args, **kw): def print_warning(self, msg, *args, **kw):
warning_code = kw.get("wc", EXIT_WARNING) # note: wc=None can be used to not influence exit code warning_code = kw.get("wc", EXIT_WARNING) # note: wc=None can be used to not influence exit code

View file

@ -168,6 +168,7 @@ def wrapper(self, args, **kwargs):
manifest_, manifest_,
progress=getattr(args, "progress", False), progress=getattr(args, "progress", False),
cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED), cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
start_backup=getattr(self, "start_backup", None),
iec=getattr(args, "iec", False), iec=getattr(args, "iec", False),
) as cache_: ) as cache_:
return method(self, args, repository=repository, cache=cache_, **kwargs) return method(self, args, repository=repository, cache=cache_, **kwargs)

View file

@ -214,6 +214,7 @@ def create_inner(archive, cache, fso):
self.noxattrs = args.noxattrs self.noxattrs = args.noxattrs
self.exclude_nodump = args.exclude_nodump self.exclude_nodump = args.exclude_nodump
dry_run = args.dry_run dry_run = args.dry_run
self.start_backup = time.time_ns()
t0 = archive_ts_now() t0 = archive_ts_now()
t0_monotonic = time.monotonic() t0_monotonic = time.monotonic()
logger.info('Creating archive at "%s"' % args.location.processed) logger.info('Creating archive at "%s"' % args.location.processed)

View file

@ -3,6 +3,7 @@
import shutil import shutil
import stat import stat
from collections import namedtuple from collections import namedtuple
from datetime import datetime, timezone
from time import perf_counter from time import perf_counter
from .logger import create_logger from .logger import create_logger
@ -11,7 +12,7 @@
files_cache_logger = create_logger("borg.debug.files_cache") files_cache_logger = create_logger("borg.debug.files_cache")
from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM, TIME_DIFFERS2_NS
from .checksums import xxh64 from .checksums import xxh64
from .hashindex import ChunkIndex, ChunkIndexEntry from .hashindex import ChunkIndex, ChunkIndexEntry
from .helpers import Error from .helpers import Error
@ -343,6 +344,7 @@ def __new__(
cache_mode=FILES_CACHE_MODE_DISABLED, cache_mode=FILES_CACHE_MODE_DISABLED,
iec=False, iec=False,
archive_name=None, archive_name=None,
start_backup=None,
): ):
return AdHocWithFilesCache( return AdHocWithFilesCache(
manifest=manifest, manifest=manifest,
@ -352,6 +354,7 @@ def __new__(
iec=iec, iec=iec,
cache_mode=cache_mode, cache_mode=cache_mode,
archive_name=archive_name, archive_name=archive_name,
start_backup=start_backup,
) )
@ -367,7 +370,7 @@ class FilesCacheMixin:
FILES_CACHE_NAME = "files" FILES_CACHE_NAME = "files"
def __init__(self, cache_mode, archive_name=None): def __init__(self, cache_mode, archive_name=None, start_backup=None):
self.archive_name = archive_name # ideally a SERIES name self.archive_name = archive_name # ideally a SERIES name
assert not ("c" in cache_mode and "m" in cache_mode) assert not ("c" in cache_mode and "m" in cache_mode)
assert "d" in cache_mode or "c" in cache_mode or "m" in cache_mode assert "d" in cache_mode or "c" in cache_mode or "m" in cache_mode
@ -375,6 +378,7 @@ def __init__(self, cache_mode, archive_name=None):
self._files = None self._files = None
self._newest_cmtime = 0 self._newest_cmtime = 0
self._newest_path_hashes = set() self._newest_path_hashes = set()
self.start_backup = start_backup
@property @property
def files(self): def files(self):
@ -490,32 +494,43 @@ def _read_files_cache(self):
def _write_files_cache(self, files): def _write_files_cache(self, files):
"""write files cache to cache directory""" """write files cache to cache directory"""
if self._newest_cmtime is None: max_time_ns = 2**63 - 1 # nanoseconds, good until y2262
# was never set because no files were modified/added # _self._newest_cmtime might be None if it was never set because no files were modified/added.
self._newest_cmtime = 2**63 - 1 # nanoseconds, good until y2262 newest_cmtime = self._newest_cmtime if self._newest_cmtime is not None else max_time_ns
start_backup_time = self.start_backup - TIME_DIFFERS2_NS if self.start_backup is not None else max_time_ns
# we don't want to persist files cache entries of potentially problematic files:
discard_after = min(newest_cmtime, start_backup_time)
ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2)) ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
files_cache_logger.debug("FILES-CACHE-SAVE: starting...") files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
# TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek(). # TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
with IntegrityCheckedFile(path=os.path.join(self.path, self.files_cache_name()), write=True) as fd: with IntegrityCheckedFile(path=os.path.join(self.path, self.files_cache_name()), write=True) as fd:
entry_count = 0 entries = 0
age_discarded = 0
race_discarded = 0
for path_hash, item in files.items(): for path_hash, item in files.items():
# Only keep files seen in this backup that are older than newest cmtime seen in this backup -
# this is to avoid issues with filesystem snapshots and cmtime granularity.
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
entry = FileCacheEntry(*msgpack.unpackb(item)) entry = FileCacheEntry(*msgpack.unpackb(item))
if ( if entry.age == 0: # current entries
entry.age == 0 if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
and max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < self._newest_cmtime # Only keep files seen in this backup that old enough not to suffer race conditions relating
or entry.age > 0 # to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
and entry.age < ttl keep = True
): else:
keep = False
race_discarded += 1
else: # old entries
if entry.age < ttl:
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
keep = True
else:
keep = False
age_discarded += 1
if keep:
msgpack.pack((path_hash, entry), fd) msgpack.pack((path_hash, entry), fd)
entry_count += 1 entries += 1
files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl) files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
files_cache_logger.debug( t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
"FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
) files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
return fd.integrity_data return fd.integrity_data
def file_known_and_unchanged(self, hashed_path, path_hash, st): def file_known_and_unchanged(self, hashed_path, path_hash, st):
@ -710,12 +725,13 @@ def __init__(
cache_mode=FILES_CACHE_MODE_DISABLED, cache_mode=FILES_CACHE_MODE_DISABLED,
iec=False, iec=False,
archive_name=None, archive_name=None,
start_backup=None,
): ):
""" """
:param warn_if_unencrypted: print warning if accessing unknown unencrypted repository :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
:param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
""" """
FilesCacheMixin.__init__(self, cache_mode, archive_name) FilesCacheMixin.__init__(self, cache_mode, archive_name, start_backup)
ChunksMixin.__init__(self) ChunksMixin.__init__(self)
assert isinstance(manifest, Manifest) assert isinstance(manifest, Manifest)
self.manifest = manifest self.manifest = manifest

View file

@ -117,6 +117,9 @@
# we can't go much higher here (like e.g. to 2s) without causing issues. # we can't go much higher here (like e.g. to 2s) without causing issues.
TIME_DIFFERS1_NS = 20000000 TIME_DIFFERS1_NS = 20000000
# similar to above, but for bigger granularity / clock differences
TIME_DIFFERS2_NS = 3000000000
# return codes returned by borg command # return codes returned by borg command
EXIT_SUCCESS = 0 # everything done, no problems EXIT_SUCCESS = 0 # everything done, no problems
EXIT_WARNING = 1 # reached normal end of operation, but there were issues (generic warning) EXIT_WARNING = 1 # reached normal end of operation, but there were issues (generic warning)