From 9e534c1929c3ec5a80ff5149b92eab2bd0efa313 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 16 Apr 2023 13:42:33 +0200
Subject: [PATCH 1/3] Archive.extract_item: remove unused params, make most
 params kwargs

stripped_components: this is done already in do_extract, it modifies item.path accordingly.

original_path: not used any more.

also: run black.
---
 src/borg/archive.py              | 16 +---------------
 src/borg/archiver/extract_cmd.py | 10 +---------
 2 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index c181a2a53..fefb2bb46 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -780,18 +780,7 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
                 # In this case, we *want* to extract twice, because there is no other way.
                 pass
 
-    def extract_item(
-        self,
-        item,
-        restore_attrs=True,
-        dry_run=False,
-        stdout=False,
-        sparse=False,
-        hlm=None,
-        stripped_components=0,
-        original_path=None,
-        pi=None,
-    ):
+    def extract_item(self, item, *, restore_attrs=True, dry_run=False, stdout=False, sparse=False, hlm=None, pi=None):
         """
         Extract archive item.
 
@@ -801,8 +790,6 @@ def extract_item(
         :param stdout: write extracted data to stdout
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
-        :param stripped_components: stripped leading path components to correct hard link extraction
-        :param original_path: 'path' key as stored in archive
         :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
         """
         has_damaged_chunks = "chunks_healthy" in item
@@ -834,7 +821,6 @@ def extract_item(
                 raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
             return
 
-        original_path = original_path or item.path
         dest = self.cwd
         if item.path.startswith(("/", "../")):
             raise Exception("Path should be relative and local")
diff --git a/src/borg/archiver/extract_cmd.py b/src/borg/archiver/extract_cmd.py
index a3fed094e..af64b1ad9 100644
--- a/src/borg/archiver/extract_cmd.py
+++ b/src/borg/archiver/extract_cmd.py
@@ -75,15 +75,7 @@ def do_extract(self, args, repository, manifest, archive):
                         dirs.append(item)
                         archive.extract_item(item, stdout=stdout, restore_attrs=False)
                     else:
-                        archive.extract_item(
-                            item,
-                            stdout=stdout,
-                            sparse=sparse,
-                            hlm=hlm,
-                            stripped_components=strip_components,
-                            original_path=orig_path,
-                            pi=pi,
-                        )
+                        archive.extract_item(item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi)
             except (BackupOSError, BackupError) as e:
                 self.print_warning("%s: %s", remove_surrogates(orig_path), e)
 

From 7786cc7cb4e380bec58581c39128d8c9a3c96ff4 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 16 Apr 2023 18:46:40 +0200
Subject: [PATCH 2/3] extract: support extraction of atime/mtime on win32

---
 src/borg/archive.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index fefb2bb46..bd3895c0e 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -984,6 +984,16 @@ def restore_attrs(self, path, item, symlink=False, fd=None):
                     set_flags(path, item.bsdflags, fd=fd)
                 except OSError:
                     pass
+        else:  # win32
+            # set timestamps rather late
+            mtime = item.mtime
+            atime = item.atime if "atime" in item else mtime
+            try:
+                # note: no fd support on win32
+                os.utime(path, None, ns=(atime, mtime))
+            except OSError:
+                # some systems don't support calling utime on a symlink
+                pass
 
     def set_meta(self, key, value):
         metadata = self._load_meta(self.id)

From 573275e67850bc9cb8383c9e7ff7bc81c72f7cfb Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sun, 16 Apr 2023 15:34:40 +0200
Subject: [PATCH 3/3] extract --continue: continue a previously interrupted
 extraction, fixes #1356

This skips over all previously fully extracted regular files,
but will delete and fully re-extract incomplete files.
---
 src/borg/archive.py                        | 37 +++++++++++++++++-
 src/borg/archiver/extract_cmd.py           | 11 +++++-
 src/borg/testsuite/archiver/extract_cmd.py | 45 +++++++++++++++++++++-
 3 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/src/borg/archive.py b/src/borg/archive.py
index bd3895c0e..3caf50608 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -780,7 +780,18 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
                 # In this case, we *want* to extract twice, because there is no other way.
                 pass
 
-    def extract_item(self, item, *, restore_attrs=True, dry_run=False, stdout=False, sparse=False, hlm=None, pi=None):
+    def extract_item(
+        self,
+        item,
+        *,
+        restore_attrs=True,
+        dry_run=False,
+        stdout=False,
+        sparse=False,
+        hlm=None,
+        pi=None,
+        continue_extraction=False,
+    ):
         """
         Extract archive item.
 
@@ -791,7 +802,27 @@ def extract_item(self, item, *, restore_attrs=True, dry_run=False, stdout=False,
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
         :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
+        :param continue_extraction: continue a previously interrupted extraction of same archive
         """
+
+        def same_item(item, st):
+            """is the archived item the same as the fs item at same path with stat st?"""
+            if not stat.S_ISREG(st.st_mode):
+                # we only "optimize" for regular files.
+                # other file types are less frequent and have no content extraction we could "optimize away".
+                return False
+            if item.mode != st.st_mode or item.size != st.st_size:
+                # the size check catches incomplete previous file extraction
+                return False
+            if item.get("mtime") != st.st_mtime_ns:
+                # note: mtime is "extracted" late, after xattrs and ACLs, but before flags.
+                return False
+            # this is good enough for the intended use case:
+            # continuing an extraction of same archive that initially started in an empty directory.
+            # there is a very small risk that "bsdflags" of one file are wrong:
+            # if a previous extraction was interrupted between setting the mtime and setting non-default flags.
+            return True
+
         has_damaged_chunks = "chunks_healthy" in item
         if dry_run or stdout:
             with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
@@ -828,7 +859,9 @@ def extract_item(self, item, *, restore_attrs=True, dry_run=False, stdout=False,
         # Attempt to remove existing files, ignore errors on failure
         try:
             st = os.stat(path, follow_symlinks=False)
-            if stat.S_ISDIR(st.st_mode):
+            if continue_extraction and same_item(item, st):
+                return  # done! we already have fully extracted this file in a previous run.
+            elif stat.S_ISDIR(st.st_mode):
                 os.rmdir(path)
             else:
                 os.unlink(path)
diff --git a/src/borg/archiver/extract_cmd.py b/src/borg/archiver/extract_cmd.py
index af64b1ad9..452b9a9a5 100644
--- a/src/borg/archiver/extract_cmd.py
+++ b/src/borg/archiver/extract_cmd.py
@@ -42,6 +42,7 @@ def do_extract(self, args, repository, manifest, archive):
         stdout = args.stdout
         sparse = args.sparse
         strip_components = args.strip_components
+        continue_extraction = args.continue_extraction
         dirs = []
         hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path
 
@@ -75,7 +76,9 @@ def do_extract(self, args, repository, manifest, archive):
                         dirs.append(item)
                         archive.extract_item(item, stdout=stdout, restore_attrs=False)
                     else:
-                        archive.extract_item(item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi)
+                        archive.extract_item(
+                            item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
+                        )
             except (BackupOSError, BackupError) as e:
                 self.print_warning("%s: %s", remove_surrogates(orig_path), e)
 
@@ -166,6 +169,12 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
             action="store_true",
             help="create holes in output sparse file from all-zero chunks",
         )
+        subparser.add_argument(
+            "--continue",
+            dest="continue_extraction",
+            action="store_true",
+            help="continue a previously interrupted extraction of same archive",
+        )
         subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
         subparser.add_argument(
             "paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"
diff --git a/src/borg/testsuite/archiver/extract_cmd.py b/src/borg/testsuite/archiver/extract_cmd.py
index f3266d011..db1b3e4d2 100644
--- a/src/borg/testsuite/archiver/extract_cmd.py
+++ b/src/borg/testsuite/archiver/extract_cmd.py
@@ -13,7 +13,7 @@
 from ...helpers import flags_noatime, flags_normal
 from .. import changedir, same_ts_ns
 from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
-from ..platform import is_darwin
+from ..platform import is_darwin, is_win32
 from . import (
     ArchiverTestCaseBase,
     ArchiverTestCaseBinaryBase,
@@ -621,6 +621,49 @@ def patched_setxattr_EACCES(*args, **kwargs):
             with patch.object(xattr, "setxattr", patched_setxattr_EACCES):
                 self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=EXIT_WARNING)
 
+    def test_extract_continue(self):
+        CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        self.create_regular_file("file1", contents=CONTENTS1)
+        self.create_regular_file("file2", contents=CONTENTS2)
+        self.create_regular_file("file3", contents=CONTENTS3)
+        self.cmd(f"--repo={self.repository_location}", "create", "arch", "input")
+        with changedir("output"):
+            # we simulate an interrupted/partial extraction:
+            self.cmd(f"--repo={self.repository_location}", "extract", "arch")
+            # do not modify file1, it stands for a successfully extracted file
+            file1_st = os.stat("input/file1")
+            # simulate a partially extracted file2 (smaller size, archived mtime not yet set)
+            file2_st = os.stat("input/file2")
+            os.truncate("input/file2", 123)  # -> incorrect size, incorrect mtime
+            # simulate file3 has not yet been extracted
+            file3_st = os.stat("input/file3")
+            os.remove("input/file3")
+        with changedir("output"):
+            # now try to continue extracting, using the same archive, same output dir:
+            self.cmd(f"--repo={self.repository_location}", "extract", "arch", "--continue")
+            now_file1_st = os.stat("input/file1")
+            assert file1_st.st_ino == now_file1_st.st_ino  # file1 was NOT extracted again
+            assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns  # has correct mtime
+            new_file2_st = os.stat("input/file2")
+            assert file2_st.st_ino != new_file2_st.st_ino  # file2 was extracted again
+            assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns  # has correct mtime
+            new_file3_st = os.stat("input/file3")
+            assert file3_st.st_ino != new_file3_st.st_ino  # file3 was extracted again
+            assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns  # has correct mtime
+            # windows has a strange ctime behaviour when deleting and recreating a file
+            if not is_win32:
+                assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns  # file not extracted again
+                assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns  # file extracted again
+                assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns  # file extracted again
+            # check if all contents (and thus also file sizes) are correct:
+            with open("input/file1", "rb") as f:
+                assert f.read() == CONTENTS1
+            with open("input/file2", "rb") as f:
+                assert f.read() == CONTENTS2
+            with open("input/file3", "rb") as f:
+                assert f.read() == CONTENTS3
+
 
 class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
     """run the same tests, but with a remote repository"""