mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-22 14:11:27 +00:00
Merge pull request #2622 from enkore/issue/2557
repository: truncate segments before unlinking
This commit is contained in:
commit
fb26b3c728
3 changed files with 55 additions and 6 deletions
|
@ -1995,6 +1995,23 @@ def secure_erase(path):
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_and_unlink(path):
|
||||||
|
"""
|
||||||
|
Truncate and then unlink *path*.
|
||||||
|
|
||||||
|
Do not create *path* if it does not exist.
|
||||||
|
Open *path* for truncation in r+b mode (=O_RDWR|O_BINARY).
|
||||||
|
|
||||||
|
Use this when deleting potentially large files when recovering
|
||||||
|
from a VFS error such as ENOSPC. It can help a full file system
|
||||||
|
recover. Refer to the "File system interaction" section
|
||||||
|
in repository.py for further explanations.
|
||||||
|
"""
|
||||||
|
with open(path, 'r+b') as fd:
|
||||||
|
fd.truncate()
|
||||||
|
os.unlink(path)
|
||||||
|
|
||||||
|
|
||||||
def popen_with_error_handling(cmd_line: str, log_prefix='', **kwargs):
|
def popen_with_error_handling(cmd_line: str, log_prefix='', **kwargs):
|
||||||
"""
|
"""
|
||||||
Handle typical errors raised by subprocess.Popen. Return None if an error occurred,
|
Handle typical errors raised by subprocess.Popen. Return None if an error occurred,
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import errno
|
import errno
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from borg.helpers import truncate_and_unlink
|
||||||
|
|
||||||
"""
|
"""
|
||||||
platform base module
|
platform base module
|
||||||
====================
|
====================
|
||||||
|
@ -157,7 +159,7 @@ def __init__(self, path, binary=False):
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
from .. import platform
|
from .. import platform
|
||||||
try:
|
try:
|
||||||
os.unlink(self.tmppath)
|
truncate_and_unlink(self.tmppath)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
self.fd = platform.SyncFile(self.tmppath, self.binary)
|
self.fd = platform.SyncFile(self.tmppath, self.binary)
|
||||||
|
@ -167,7 +169,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
from .. import platform
|
from .. import platform
|
||||||
self.fd.close()
|
self.fd.close()
|
||||||
if exc_type is not None:
|
if exc_type is not None:
|
||||||
os.unlink(self.tmppath)
|
truncate_and_unlink(self.tmppath)
|
||||||
return
|
return
|
||||||
os.replace(self.tmppath, self.path)
|
os.replace(self.tmppath, self.path)
|
||||||
platform.sync_dir(os.path.dirname(self.path))
|
platform.sync_dir(os.path.dirname(self.path))
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
from .helpers import ProgressIndicatorPercent
|
from .helpers import ProgressIndicatorPercent
|
||||||
from .helpers import bin_to_hex
|
from .helpers import bin_to_hex
|
||||||
from .helpers import hostname_is_unique
|
from .helpers import hostname_is_unique
|
||||||
from .helpers import secure_erase
|
from .helpers import secure_erase, truncate_and_unlink
|
||||||
from .locking import Lock, LockError, LockErrorT
|
from .locking import Lock, LockError, LockErrorT
|
||||||
from .logger import create_logger
|
from .logger import create_logger
|
||||||
from .lrucache import LRUCache
|
from .lrucache import LRUCache
|
||||||
|
@ -83,6 +83,30 @@ class Repository:
|
||||||
dir/data/<X // SEGMENTS_PER_DIR>/<X>
|
dir/data/<X // SEGMENTS_PER_DIR>/<X>
|
||||||
dir/index.X
|
dir/index.X
|
||||||
dir/hints.X
|
dir/hints.X
|
||||||
|
|
||||||
|
File system interaction
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
LoggedIO generally tries to rely on common behaviours across transactional file systems.
|
||||||
|
|
||||||
|
Segments that are deleted are truncated first, which avoids problems if the FS needs to
|
||||||
|
allocate space to delete the dirent of the segment. This mostly affects CoW file systems,
|
||||||
|
traditional journaling file systems have a fairly good grip on this problem.
|
||||||
|
|
||||||
|
Note that deletion, i.e. unlink(2), is atomic on every file system that uses inode reference
|
||||||
|
counts, which includes pretty much all of them. To remove a dirent the inodes refcount has
|
||||||
|
to be decreased, but you can't decrease the refcount before removing the dirent nor can you
|
||||||
|
decrease the refcount after removing the dirent. File systems solve this with a lock,
|
||||||
|
and by ensuring it all stays within the same FS transaction.
|
||||||
|
|
||||||
|
Truncation is generally not atomic in itself, and combining truncate(2) and unlink(2) is of
|
||||||
|
course never guaranteed to be atomic. Truncation in a classic extent-based FS is done in
|
||||||
|
roughly two phases, first the extents are removed then the inode is updated. (In practice
|
||||||
|
this is of course way more complex).
|
||||||
|
|
||||||
|
LoggedIO gracefully handles truncate/unlink splits as long as the truncate resulted in
|
||||||
|
a zero length file. Zero length segments are considered to not exist, while LoggedIO.cleanup()
|
||||||
|
will still get rid of them.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class DoesNotExist(Error):
|
class DoesNotExist(Error):
|
||||||
|
@ -1111,6 +1135,8 @@ def segment_iterator(self, segment=None, reverse=False):
|
||||||
filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment]
|
filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment]
|
||||||
filenames = sorted(filenames, key=int, reverse=reverse)
|
filenames = sorted(filenames, key=int, reverse=reverse)
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
|
# Note: Do not filter out logically deleted segments (see "File system interaction" above),
|
||||||
|
# since this is used by cleanup and txn state detection as well.
|
||||||
yield int(filename), os.path.join(data_path, dir, filename)
|
yield int(filename), os.path.join(data_path, dir, filename)
|
||||||
|
|
||||||
def get_latest_segment(self):
|
def get_latest_segment(self):
|
||||||
|
@ -1132,7 +1158,7 @@ def cleanup(self, transaction_id):
|
||||||
self.segment = transaction_id + 1
|
self.segment = transaction_id + 1
|
||||||
for segment, filename in self.segment_iterator(reverse=True):
|
for segment, filename in self.segment_iterator(reverse=True):
|
||||||
if segment > transaction_id:
|
if segment > transaction_id:
|
||||||
os.unlink(filename)
|
truncate_and_unlink(filename)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -1207,12 +1233,15 @@ def delete_segment(self, segment):
|
||||||
if segment in self.fds:
|
if segment in self.fds:
|
||||||
del self.fds[segment]
|
del self.fds[segment]
|
||||||
try:
|
try:
|
||||||
os.unlink(self.segment_filename(segment))
|
truncate_and_unlink(self.segment_filename(segment))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def segment_exists(self, segment):
|
def segment_exists(self, segment):
|
||||||
return os.path.exists(self.segment_filename(segment))
|
filename = self.segment_filename(segment)
|
||||||
|
# When deleting segments, they are first truncated. If truncate(2) and unlink(2) are split
|
||||||
|
# across FS transactions, then logically deleted segments will show up as truncated.
|
||||||
|
return os.path.exists(filename) and os.path.getsize(filename)
|
||||||
|
|
||||||
def segment_size(self, segment):
|
def segment_size(self, segment):
|
||||||
return os.path.getsize(self.segment_filename(segment))
|
return os.path.getsize(self.segment_filename(segment))
|
||||||
|
@ -1258,6 +1287,7 @@ def recover_segment(self, segment, filename):
|
||||||
if segment in self.fds:
|
if segment in self.fds:
|
||||||
del self.fds[segment]
|
del self.fds[segment]
|
||||||
with open(filename, 'rb') as fd:
|
with open(filename, 'rb') as fd:
|
||||||
|
# XXX: Rather use mmap, this loads the entire segment (up to 500 MB by default) into memory.
|
||||||
data = memoryview(fd.read())
|
data = memoryview(fd.read())
|
||||||
os.rename(filename, filename + '.beforerecover')
|
os.rename(filename, filename + '.beforerecover')
|
||||||
logger.info('attempting to recover ' + filename)
|
logger.info('attempting to recover ' + filename)
|
||||||
|
|
Loading…
Reference in a new issue