1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-01 12:45:34 +00:00

Merge pull request #7526 from ThomasWaldmann/continue-extract-master

extract --continue
This commit is contained in:
TW 2023-04-17 22:28:24 +02:00 committed by GitHub
commit cca8280393
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 87 additions and 14 deletions

View file

@ -783,14 +783,14 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
def extract_item(
self,
item,
*,
restore_attrs=True,
dry_run=False,
stdout=False,
sparse=False,
hlm=None,
stripped_components=0,
original_path=None,
pi=None,
continue_extraction=False,
):
"""
Extract archive item.
@ -801,10 +801,28 @@ def extract_item(
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
:param stripped_components: stripped leading path components to correct hard link extraction
:param original_path: 'path' key as stored in archive
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
:param continue_extraction: continue a previously interrupted extraction of same archive
"""
def same_item(item, st):
"""is the archived item the same as the fs item at same path with stat st?"""
if not stat.S_ISREG(st.st_mode):
# we only "optimize" for regular files.
# other file types are less frequent and have no content extraction we could "optimize away".
return False
if item.mode != st.st_mode or item.size != st.st_size:
# the size check catches incomplete previous file extraction
return False
if item.get("mtime") != st.st_mtime_ns:
# note: mtime is "extracted" late, after xattrs and ACLs, but before flags.
return False
# this is good enough for the intended use case:
# continuing an extraction of same archive that initially started in an empty directory.
# there is a very small risk that "bsdflags" of one file are wrong:
# if a previous extraction was interrupted between setting the mtime and setting non-default flags.
return True
has_damaged_chunks = "chunks_healthy" in item
if dry_run or stdout:
with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
@ -834,7 +852,6 @@ def extract_item(
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
original_path = original_path or item.path
dest = self.cwd
if item.path.startswith(("/", "../")):
raise Exception("Path should be relative and local")
@ -842,7 +859,9 @@ def extract_item(
# Attempt to remove existing files, ignore errors on failure
try:
st = os.stat(path, follow_symlinks=False)
if stat.S_ISDIR(st.st_mode):
if continue_extraction and same_item(item, st):
return # done! we already have fully extracted this file in a previous run.
elif stat.S_ISDIR(st.st_mode):
os.rmdir(path)
else:
os.unlink(path)
@ -998,6 +1017,16 @@ def restore_attrs(self, path, item, symlink=False, fd=None):
set_flags(path, item.bsdflags, fd=fd)
except OSError:
pass
else: # win32
# set timestamps rather late
mtime = item.mtime
atime = item.atime if "atime" in item else mtime
try:
# note: no fd support on win32
os.utime(path, None, ns=(atime, mtime))
except OSError:
# some systems don't support calling utime on a symlink
pass
def set_meta(self, key, value):
metadata = self._load_meta(self.id)

View file

@ -42,6 +42,7 @@ def do_extract(self, args, repository, manifest, archive):
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
continue_extraction = args.continue_extraction
dirs = []
hlm = HardLinkManager(id_type=bytes, info_type=str) # hlid -> path
@ -76,13 +77,7 @@ def do_extract(self, args, repository, manifest, archive):
archive.extract_item(item, stdout=stdout, restore_attrs=False)
else:
archive.extract_item(
item,
stdout=stdout,
sparse=sparse,
hlm=hlm,
stripped_components=strip_components,
original_path=orig_path,
pi=pi,
item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
)
except (BackupOSError, BackupError) as e:
self.print_warning("%s: %s", remove_surrogates(orig_path), e)
@ -174,6 +169,12 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
action="store_true",
help="create holes in output sparse file from all-zero chunks",
)
subparser.add_argument(
"--continue",
dest="continue_extraction",
action="store_true",
help="continue a previously interrupted extraction of same archive",
)
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"

View file

@ -13,7 +13,7 @@
from ...helpers import flags_noatime, flags_normal
from .. import changedir, same_ts_ns
from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
from ..platform import is_darwin
from ..platform import is_darwin, is_win32
from . import (
ArchiverTestCaseBase,
ArchiverTestCaseBinaryBase,
@ -621,6 +621,49 @@ def patched_setxattr_EACCES(*args, **kwargs):
with patch.object(xattr, "setxattr", patched_setxattr_EACCES):
self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=EXIT_WARNING)
def test_extract_continue(self):
CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("file1", contents=CONTENTS1)
self.create_regular_file("file2", contents=CONTENTS2)
self.create_regular_file("file3", contents=CONTENTS3)
self.cmd(f"--repo={self.repository_location}", "create", "arch", "input")
with changedir("output"):
# we simulate an interrupted/partial extraction:
self.cmd(f"--repo={self.repository_location}", "extract", "arch")
# do not modify file1, it stands for a successfully extracted file
file1_st = os.stat("input/file1")
# simulate a partially extracted file2 (smaller size, archived mtime not yet set)
file2_st = os.stat("input/file2")
os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime
# simulate file3 has not yet been extracted
file3_st = os.stat("input/file3")
os.remove("input/file3")
with changedir("output"):
# now try to continue extracting, using the same archive, same output dir:
self.cmd(f"--repo={self.repository_location}", "extract", "arch", "--continue")
now_file1_st = os.stat("input/file1")
assert file1_st.st_ino == now_file1_st.st_ino # file1 was NOT extracted again
assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns # has correct mtime
new_file2_st = os.stat("input/file2")
assert file2_st.st_ino != new_file2_st.st_ino # file2 was extracted again
assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns # has correct mtime
new_file3_st = os.stat("input/file3")
assert file3_st.st_ino != new_file3_st.st_ino # file3 was extracted again
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # has correct mtime
# windows has a strange ctime behaviour when deleting and recreating a file
if not is_win32:
assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns # file not extracted again
assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns # file extracted again
assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns # file extracted again
# check if all contents (and thus also file sizes) are correct:
with open("input/file1", "rb") as f:
assert f.read() == CONTENTS1
with open("input/file2", "rb") as f:
assert f.read() == CONTENTS2
with open("input/file3", "rb") as f:
assert f.read() == CONTENTS3
class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
"""run the same tests, but with a remote repository"""