support platforms with no os.link (#4903)

support platforms with no os.link, fixes #4901

if we don't have os.link, we just extract another copy instead of making a hardlink.

for that to work, we need to have (and keep) the chunks list in hardlink_masters.
This commit is contained in:
TW 2020-01-17 20:56:23 +01:00 committed by Will
parent 854065e193
commit 597b09a993
6 changed files with 43 additions and 12 deletions

View File

@ -48,6 +48,7 @@ from .remote import cache_if_remote
from .repository import Repository, LIST_SCAN_LIMIT
has_lchmod = hasattr(os, 'lchmod')
has_link = hasattr(os, 'link')
class Statistics:
@ -635,7 +636,7 @@ Utilization of max. archive size: {csize_max:.0%}
if 'source' in item:
source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
chunks, link_target = hardlink_masters.get(item.source, (None, source))
if link_target:
if link_target and has_link:
# Hard link was extracted previously, just link
with backup_io('link'):
os.link(link_target, path)
@ -645,8 +646,14 @@ Utilization of max. archive size: {csize_max:.0%}
item.chunks = chunks
yield hardlink_set
if not hardlink_set and hardlink_masters:
# Update master entry with extracted item path, so that following hardlinks don't extract twice.
hardlink_masters[item.get('source') or original_path] = (None, path)
if has_link:
# Update master entry with extracted item path, so that following hardlinks don't extract twice.
# We have hardlinking support, so we will hardlink not extract.
hardlink_masters[item.get('source') or original_path] = (None, path)
else:
# Broken platform with no hardlinking support.
# In this case, we *want* to extract twice, because there is no other way.
pass
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, stripped_components=0, original_path=None, pi=None):

View File

@ -39,6 +39,7 @@ try:
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
from .archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
from .archive import has_link
from .cache import Cache, assert_secure, SecurityManager
from .constants import * # NOQA
from .compress import CompressionSpec
@ -768,11 +769,20 @@ class Archiver:
strip_components = args.strip_components
dirs = []
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
hardlink_masters = {} if partial_extract or not has_link else None
def peek_and_store_hardlink_masters(item, matched):
if (partial_extract and not matched and hardlinkable(item.mode) and
item.get('hardlink_master', True) and 'source' not in item):
# not has_link:
# OS does not have hardlink capability thus we need to remember the chunks so that
# we can extract all hardlinks as separate normal (not-hardlinked) files instead.
#
# partial_extract and not matched and hardlinkable:
# we do not extract the very first hardlink, so we need to remember the chunks
# in hardlinks_master, so we can use them when we extract some 2nd+ hardlink item
# that has no chunks list.
if ((not has_link or (partial_extract and not matched and hardlinkable(item.mode)))
and
(item.get('hardlink_master', True) and 'source' not in item)):
hardlink_masters[item.get('path')] = (item.get('chunks'), None)
filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)

View File

@ -295,14 +295,18 @@ class Repository:
secure_erase(old_config_path)
if os.path.isfile(config_path):
link_error_msg = ("Failed to securely erase old repository config file (hardlinks not supported>). "
"Old repokey data, if any, might persist on physical storage.")
try:
os.link(config_path, old_config_path)
except OSError as e:
if e.errno in (errno.EMLINK, errno.ENOSYS, errno.EPERM, errno.EACCES, errno.ENOTSUP):
logger.warning("Failed to securely erase old repository config file (hardlinks not supported>). "
"Old repokey data, if any, might persist on physical storage.")
logger.warning(link_error_msg)
else:
raise
except AttributeError:
# some python ports have no os.link, see #4901
logger.warning(link_error_msg)
with SaveFile(config_path) as fd:
config.write(fd)

View File

@ -79,6 +79,10 @@ def are_symlinks_supported():
@functools.lru_cache()
def are_hardlinks_supported():
if not hasattr(os, 'link'):
# some pythons do not have os.link
return False
with unopened_tempfile() as file1path, unopened_tempfile() as file2path:
open(file1path, 'w').close()
try:

View File

@ -1025,7 +1025,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.create_regular_file('cache2/%s' % CACHE_TAG_NAME,
contents=b'invalid signature')
os.mkdir('input/cache3')
os.link('input/cache1/%s' % CACHE_TAG_NAME, 'input/cache3/%s' % CACHE_TAG_NAME)
if are_hardlinks_supported():
os.link('input/cache1/%s' % CACHE_TAG_NAME, 'input/cache3/%s' % CACHE_TAG_NAME)
else:
self.create_regular_file('cache3/%s' % CACHE_TAG_NAME,
contents=CACHE_TAG_CONTENTS + b' extra stuff')
def test_create_stdin(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
@ -2205,8 +2209,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert sti1.st_atime == sto1.st_atime
assert sti1.st_ctime == sto1.st_ctime
assert sti1.st_mtime == sto1.st_mtime
# note: there is another hardlink to this, see below
assert sti1.st_nlink == sto1.st_nlink == 2
if are_hardlinks_supported():
# note: there is another hardlink to this, see below
assert sti1.st_nlink == sto1.st_nlink == 2
# read
with open(in_fn, 'rb') as in_f, open(out_fn, 'rb') as out_f:
assert in_f.read() == out_f.read()

View File

@ -141,6 +141,7 @@ def test_keys(attic_repo, attic_key_file):
assert key_valid(keyfile_path)
@pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
def test_convert_all(attic_repo, attic_key_file, inplace):
"""test all conversion steps
@ -166,7 +167,7 @@ def test_convert_all(attic_repo, attic_key_file, inplace):
with AtticRepositoryUpgrader(repo_path, create=False) as repository:
# replicate command dispatch, partly
os.umask(UMASK_DEFAULT)
backup = repository.upgrade(dryrun=False, inplace=inplace)
backup = repository.upgrade(dryrun=False, inplace=inplace) # note: uses hardlinks internally
if inplace:
assert backup is None
assert first_inode(repository.path) == orig_inode