borg mount: support hardlinks correctly, add tests

previous commit did not yet support hardlinks correctly, if the
hardlink master was excluded somehow.

added some tests for this, also refactored related tests slightly.
This commit is contained in:
Thomas Waldmann 2017-11-21 01:20:57 +01:00
parent 77df1cfe8c
commit e97deafb16
2 changed files with 94 additions and 37 deletions

View File

@ -297,8 +297,15 @@ class FuseBackend(object):
consider_part_files=self._args.consider_part_files)
strip_components = self._args.strip_components
matcher = Archiver.build_matcher(self._args.patterns, self._args.paths)
dummy = lambda x, y: None # TODO: add hardlink_master support code, see Archiver
filter = Archiver.build_filter(matcher, dummy, strip_components)
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
def peek_and_store_hardlink_masters(item, matched):
if (partial_extract and not matched and hardlinkable(item.mode) and
item.get('hardlink_master', True) and 'source' not in item):
hardlink_masters[item.get('path')] = (item.get('chunks'), None)
filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
if strip_components:
item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
@ -319,11 +326,16 @@ class FuseBackend(object):
parent = 1
for segment in segments[:-1]:
parent = self._process_inner(segment, parent)
self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode,
hardlink_masters, strip_components)
duration = time.perf_counter() - t0
logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name)
def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components):
path = item.path
del item.path # save some space
hardlink_masters = hardlink_masters or {}
def file_version(item, path):
if 'chunks' in item:
file_id = blake2b_128(path)
@ -348,35 +360,44 @@ class FuseBackend(object):
version_enc = os.fsencode('.%05d' % version)
return name + version_enc + ext
if 'source' in item and hardlinkable(item.mode):
source = os.path.join(*item.source.split(os.sep)[stripped_components:])
chunks, link_target = hardlink_masters.get(item.source, (None, source))
if link_target:
# Hard link was extracted previously, just link
link_target = os.fsencode(link_target)
if self.versions:
# adjust link target name with version
version = self.file_versions[link_target]
link_target = make_versioned_name(link_target, version, add_dir=True)
try:
inode = self.find_inode(link_target, prefix)
except KeyError:
logger.warning('Skipping broken hard link: %s -> %s', path, source)
return
item = self.get_item(inode)
item.nlink = item.get('nlink', 1) + 1
self._items[inode] = item
elif chunks is not None:
# assign chunks to this item, since the item which had the chunks was not extracted
item.chunks = chunks
inode = item_inode
self._items[inode] = item
if hardlink_masters:
# Update master entry with extracted item path, so that following hardlinks don't extract twice.
hardlink_masters[item.source] = (None, path)
else:
inode = item_inode
if self.versions and not is_dir:
parent = self._process_inner(name, parent)
path = os.fsencode(item.path)
version = file_version(item, path)
enc_path = os.fsencode(path)
version = file_version(item, enc_path)
if version is not None:
# regular file, with contents - maybe a hardlink master
name = make_versioned_name(name, version)
self.file_versions[path] = version
self.file_versions[enc_path] = version
path = item.path
del item.path # save some space
if 'source' in item and hardlinkable(item.mode):
# a hardlink, no contents, <source> is the hardlink master
source = os.fsencode(item.source)
if self.versions:
# adjust source name with version
version = self.file_versions[source]
source = make_versioned_name(source, version, add_dir=True)
name = make_versioned_name(name, version)
try:
inode = self.find_inode(source, prefix)
except KeyError:
logger.warning('Skipping broken hard link: %s -> %s', path, item.source)
return
item = self.cache.get(inode)
item.nlink = item.get('nlink', 1) + 1
self._items[inode] = item
else:
inode = item_inode
self.parent[inode] = parent
if name:
self.contents[parent][name] = inode

View File

@ -760,7 +760,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
os.mkdir(os.path.join(self.input_path, 'dir1'))
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
self.create_regular_file('source')
self.create_regular_file('source', contents=b'123456')
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'abba'))
os.link(os.path.join(self.input_path, 'source'),
@ -778,30 +778,56 @@ class ArchiverTestCase(ArchiverTestCaseBase):
requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
@requires_hardlinks
def test_strip_components_links(self):
@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_mount_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \
changedir(mountpoint):
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert open('subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \
changedir(mountpoint):
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with self.fuse_mount(self.repository_location + '::test', mountpoint), \
changedir(mountpoint):
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
@requires_hardlinks
def test_extract_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert open('subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
def test_extract_include_exclude(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file('test', contents=b'first')
if are_hardlinks_supported():
self.create_regular_file('hardlink1', contents=b'')
self.create_regular_file('hardlink1', contents=b'123456')
os.link('input/hardlink1', 'input/hardlink2')
os.link('input/hardlink1', 'input/hardlink3')
self.cmd('create', self.repository_location + '::archive1', 'input')
self.create_regular_file('test', contents=b'second')
self.cmd('create', self.repository_location + '::archive2', 'input')
@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there
assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
if are_hardlinks_supported():
st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
assert st1.st_ino == st2.st_ino
hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')
hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, 'rb').read() == b'123456'
# similar again, but exclude the hardlink master:
with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'):
if are_hardlinks_supported():
hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
assert os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, 'rb').read() == b'123456'
@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_fuse_allow_damaged_files(self):