repository: truncate segments before unlinking

2025-02-21 21:57:36 +00:00 · 2017-05-25 18:51:10 +02:00 · 2017-05-25 18:51:10 +02:00 · 95064cd241
commit 95064cd241
parent 68b61acffd
1 changed files with 42 additions and 2 deletions
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -83,6 +83,30 @@ class Repository:
    dir/data/<X // SEGMENTS_PER_DIR>/<X>
    dir/index.X
    dir/hints.X
    File system interaction
    -----------------------
    LoggedIO generally tries to rely on common behaviours across transactional file systems.
    Segments that are deleted are truncated first, which avoids problems if the FS needs to
    allocate space to delete the dirent of the segment. This mostly affects CoW file systems,
    traditional journaling file systems have a fairly good grip on this problem.
    Note that deletion, i.e. unlink(2), is atomic on every file system that uses inode reference
    counts, which includes pretty much all of them. To remove a dirent the inodes refcount has
    to be decreased, but you can't decrease the refcount before removing the dirent nor can you
    decrease the refcount after removing the dirent. File systems solve this with a lock,
    and by ensuring it all stays within the same FS transaction.
    Truncation is generally not atomic in itself, and combining truncate(2) and unlink(2) is of
    course never guaranteed to be atomic. Truncation in a classic extent-based FS is done in
    roughly two phases, first the extents are removed then the inode is updated. (In practice
    this is of course way more complex).
    LoggedIO gracefully handles truncate/unlink splits as long as the truncate resulted in
    a zero length file. Zero length segments are considered to not exist, while LoggedIO.cleanup()
    will still get rid of them.
    """
    class DoesNotExist(Error):
@ -1111,6 +1135,8 @@ def segment_iterator(self, segment=None, reverse=False):
                filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment]
            filenames = sorted(filenames, key=int, reverse=reverse)
            for filename in filenames:
                # Note: Do not filter out logically deleted segments  (see "File system interaction" above),
                # since this is used by cleanup and txn state detection as well.
                yield int(filename), os.path.join(data_path, dir, filename)
    def get_latest_segment(self):
@ -1132,6 +1158,9 @@ def cleanup(self, transaction_id):
        self.segment = transaction_id + 1
        for segment, filename in self.segment_iterator(reverse=True):
            if segment > transaction_id:
                # Truncate segment files before unlink(). This can help a full file system recover.
                # We can use 'wb', since the segment must exist (just returned by the segment_iterator).
                open(filename, 'wb').close()
                os.unlink(filename)
            else:
                break
@ -1207,12 +1236,22 @@ def delete_segment(self, segment):
        if segment in self.fds:
            del self.fds[segment]
        try:
-            os.unlink(self.segment_filename(segment))
+            filename = self.segment_filename(segment)
            # Truncate segment files before unlink(). This can help a full file system recover.
            # In this instance (cf. cleanup()) we need to use r+b (=O_RDWR|O_BINARY) and
            # issue an explicit truncate() to avoid creating a file
            # if *segment* did not exist in the first place.
            with open(filename, 'r+b') as fd:
                fd.truncate()
            os.unlink(filename)
        except FileNotFoundError:
            pass
    def segment_exists(self, segment):
-        return os.path.exists(self.segment_filename(segment))
+        filename = self.segment_filename(segment)
        # When deleting segments, they are first truncated. If truncate(2) and unlink(2) are split
        # across FS transactions, then logically deleted segments will show up as truncated.
        return os.path.exists(filename) and os.path.getsize(filename)
    def segment_size(self, segment):
        return os.path.getsize(self.segment_filename(segment))
@ -1258,6 +1297,7 @@ def recover_segment(self, segment, filename):
        if segment in self.fds:
            del self.fds[segment]
        with open(filename, 'rb') as fd:
            # XXX: Rather use mmap.
            data = memoryview(fd.read())
        os.rename(filename, filename + '.beforerecover')
        logger.info('attempting to recover ' + filename)