From 95064cd241df15bdc7fac436ad9984a3bef0160a Mon Sep 17 00:00:00 2001
From: Marian Beermann <public@enkore.de>
Date: Thu, 25 May 2017 18:51:10 +0200
Subject: [PATCH] repository: truncate segments before unlinking

---
 src/borg/repository.py | 44 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)
diff --git a/src/borg/repository.py b/src/borg/repository.py
index 597d3ca54..6ef75f8c0 100644
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@@ -83,6 +83,30 @@ class Repository:
     dir/data/<X // SEGMENTS_PER_DIR>/<X>
     dir/index.X
     dir/hints.X
+
+    File system interaction
+    -----------------------
+
+    LoggedIO generally tries to rely on common behaviours across transactional file systems.
+
+    Segments that are deleted are truncated first, which avoids problems if the FS needs to
+    allocate space to delete the dirent of the segment. This mostly affects CoW file systems,
+    traditional journaling file systems have a fairly good grip on this problem.
+
+    Note that deletion, i.e. unlink(2), is atomic on every file system that uses inode reference
+    counts, which includes pretty much all of them. To remove a dirent the inodes refcount has
+    to be decreased, but you can't decrease the refcount before removing the dirent nor can you
+    decrease the refcount after removing the dirent. File systems solve this with a lock,
+    and by ensuring it all stays within the same FS transaction.
+
+    Truncation is generally not atomic in itself, and combining truncate(2) and unlink(2) is of
+    course never guaranteed to be atomic. Truncation in a classic extent-based FS is done in
+    roughly two phases, first the extents are removed then the inode is updated. (In practice
+    this is of course way more complex).
+
+    LoggedIO gracefully handles truncate/unlink splits as long as the truncate resulted in
+    a zero length file. Zero length segments are considered to not exist, while LoggedIO.cleanup()
+    will still get rid of them.
     """
 
     class DoesNotExist(Error):
@@ -1111,6 +1135,8 @@ def segment_iterator(self, segment=None, reverse=False):
                 filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment]
             filenames = sorted(filenames, key=int, reverse=reverse)
             for filename in filenames:
+                # Note: Do not filter out logically deleted segments  (see "File system interaction" above),
+                # since this is used by cleanup and txn state detection as well.
                 yield int(filename), os.path.join(data_path, dir, filename)
 
     def get_latest_segment(self):
@@ -1132,6 +1158,9 @@ def cleanup(self, transaction_id):
         self.segment = transaction_id + 1
         for segment, filename in self.segment_iterator(reverse=True):
             if segment > transaction_id:
+                # Truncate segment files before unlink(). This can help a full file system recover.
+                # We can use 'wb', since the segment must exist (just returned by the segment_iterator).
+                open(filename, 'wb').close()
                 os.unlink(filename)
             else:
                 break
@@ -1207,12 +1236,22 @@ def delete_segment(self, segment):
         if segment in self.fds:
             del self.fds[segment]
         try:
-            os.unlink(self.segment_filename(segment))
+            filename = self.segment_filename(segment)
+            # Truncate segment files before unlink(). This can help a full file system recover.
+            # In this instance (cf. cleanup()) we need to use r+b (=O_RDWR|O_BINARY) and
+            # issue an explicit truncate() to avoid creating a file
+            # if *segment* did not exist in the first place.
+            with open(filename, 'r+b') as fd:
+                fd.truncate()
+            os.unlink(filename)
         except FileNotFoundError:
             pass
 
     def segment_exists(self, segment):
-        return os.path.exists(self.segment_filename(segment))
+        filename = self.segment_filename(segment)
+        # When deleting segments, they are first truncated. If truncate(2) and unlink(2) are split
+        # across FS transactions, then logically deleted segments will show up as truncated.
+        return os.path.exists(filename) and os.path.getsize(filename)
 
     def segment_size(self, segment):
         return os.path.getsize(self.segment_filename(segment))
@@ -1258,6 +1297,7 @@ def recover_segment(self, segment, filename):
         if segment in self.fds:
             del self.fds[segment]
         with open(filename, 'rb') as fd:
+            # XXX: Rather use mmap.
             data = memoryview(fd.read())
         os.rename(filename, filename + '.beforerecover')
         logger.info('attempting to recover ' + filename)