1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-11 22:53:27 +00:00

FilesystemObjectProcessors.process_file: clean up orphaned chunks in case of exceptions

Note: no changes inside the indented block,
just added the try and the except block.
This commit is contained in:
Thomas Waldmann 2023-02-12 20:04:08 +01:00
parent f1981715e4
commit d0c61bbbf1
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01

View file

@ -1451,76 +1451,90 @@ class FilesystemObjectProcessors:
# so it can be extracted / accessed in FUSE mount like a regular file. # so it can be extracted / accessed in FUSE mount like a regular file.
# this needs to be done early, so that part files also get the patched mode. # this needs to be done early, so that part files also get the patched mode.
item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
if hl_chunks is not None: # create_helper gave us chunks from a previous hardlink # we begin processing chunks now (writing or incref'ing them to the repository),
item.chunks = [] # which might require cleanup (see except-branch):
for chunk_id, chunk_size in hl_chunks: try:
# process one-by-one, so we will know in item.chunks how far we got if hl_chunks is not None: # create_helper gave us chunks from a previous hardlink
chunk_entry = cache.chunk_incref(chunk_id, self.stats) item.chunks = []
item.chunks.append(chunk_entry) for chunk_id, chunk_size in hl_chunks:
else: # normal case, no "2nd+" hardlink # process one-by-one, so we will know in item.chunks how far we got
if not is_special_file: chunk_entry = cache.chunk_incref(chunk_id, self.stats)
hashed_path = safe_encode(os.path.join(self.cwd, path)) item.chunks.append(chunk_entry)
started_hashing = time.monotonic() else: # normal case, no "2nd+" hardlink
path_hash = self.key.id_hash(hashed_path) if not is_special_file:
self.stats.hashing_time += time.monotonic() - started_hashing hashed_path = safe_encode(os.path.join(self.cwd, path))
known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st) started_hashing = time.monotonic()
else: path_hash = self.key.id_hash(hashed_path)
# in --read-special mode, we may be called for special files. self.stats.hashing_time += time.monotonic() - started_hashing
# there should be no information in the cache about special files processed in known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
# read-special mode, but we better play safe as this was wrong in the past:
hashed_path = path_hash = None
known, ids = False, None
if ids is not None:
# Make sure all ids are available
for id_ in ids:
if not cache.seen_chunk(id_):
# cache said it is unmodified, but we lost a chunk: process file like modified
status = "M"
break
else: else:
item.chunks = [] # in --read-special mode, we may be called for special files.
for chunk_id in ids: # there should be no information in the cache about special files processed in
# process one-by-one, so we will know in item.chunks how far we got # read-special mode, but we better play safe as this was wrong in the past:
chunk_entry = cache.chunk_incref(chunk_id, self.stats) hashed_path = path_hash = None
item.chunks.append(chunk_entry) known, ids = False, None
status = "U" # regular file, unchanged if ids is not None:
else: # Make sure all ids are available
status = "M" if known else "A" # regular file, modified or added for id_ in ids:
self.print_file_status(status, path) if not cache.seen_chunk(id_):
self.stats.files_stats[status] += 1 # cache said it is unmodified, but we lost a chunk: process file like modified
status = None # we already printed the status status = "M"
# Only chunkify the file if needed break
if "chunks" not in item: else:
with backup_io("read"): item.chunks = []
self.process_file_chunks( for chunk_id in ids:
item, # process one-by-one, so we will know in item.chunks how far we got
cache, chunk_entry = cache.chunk_incref(chunk_id, self.stats)
self.stats, item.chunks.append(chunk_entry)
self.show_progress, status = "U" # regular file, unchanged
backup_io_iter(self.chunker.chunkify(None, fd)),
)
self.stats.chunking_time = self.chunker.chunking_time
if is_win32:
changed_while_backup = False # TODO
else: else:
with backup_io("fstat2"): status = "M" if known else "A" # regular file, modified or added
st2 = os.fstat(fd) self.print_file_status(status, path)
# special files: self.stats.files_stats[status] += 1
# - fifos change naturally, because they are fed from the other side. no problem. status = None # we already printed the status
# - blk/chr devices don't change ctime anyway. # Only chunkify the file if needed
changed_while_backup = not is_special_file and st.st_ctime_ns != st2.st_ctime_ns if "chunks" not in item:
if changed_while_backup: with backup_io("read"):
status = "C" # regular file changed while we backed it up, might be inconsistent/corrupt! self.process_file_chunks(
if not is_special_file and not changed_while_backup: item,
# we must not memorize special files, because the contents of e.g. a cache,
# block or char device will change without its mtime/size/inode changing. self.stats,
# also, we must not memorize a potentially inconsistent/corrupt file that self.show_progress,
# changed while we backed it up. backup_io_iter(self.chunker.chunkify(None, fd)),
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks]) )
self.stats.nfiles += 1 self.stats.chunking_time = self.chunker.chunking_time
item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd)) if is_win32:
item.get_size(memorize=True) changed_while_backup = False # TODO
return status else:
with backup_io("fstat2"):
st2 = os.fstat(fd)
# special files:
# - fifos change naturally, because they are fed from the other side. no problem.
# - blk/chr devices don't change ctime anyway.
changed_while_backup = not is_special_file and st.st_ctime_ns != st2.st_ctime_ns
if changed_while_backup:
# regular file changed while we backed it up, might be inconsistent/corrupt!
status = "C"
if not is_special_file and not changed_while_backup:
# we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing.
# also, we must not memorize a potentially inconsistent/corrupt file that
# changed while we backed it up.
cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
self.stats.nfiles += 1
item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
item.get_size(memorize=True)
return status
except BackupOSError:
# Something went wrong and we might need to clean up a bit.
# Maybe we have already incref'ed some file content chunks in the repo -
# but we will not add an item (see add_item in create_helper) and thus
# they would be orphaned chunks in case that we commit the transaction.
for chunk in item.get("chunks", []):
cache.chunk_decref(chunk.id, self.stats, wait=False)
# Now that we have cleaned up the chunk references, we can re-raise the exception.
# This will skip processing of this file, but might retry or continue with the next one.
raise
class TarfileObjectProcessors: class TarfileObjectProcessors: