diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 3558b0dc4..9ff8a4d1f 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -297,8 +297,15 @@ class FuseBackend(object): consider_part_files=self._args.consider_part_files) strip_components = self._args.strip_components matcher = Archiver.build_matcher(self._args.patterns, self._args.paths) - dummy = lambda x, y: None # TODO: add hardlink_master support code, see Archiver - filter = Archiver.build_filter(matcher, dummy, strip_components) + partial_extract = not matcher.empty() or strip_components + hardlink_masters = {} if partial_extract else None + + def peek_and_store_hardlink_masters(item, matched): + if (partial_extract and not matched and hardlinkable(item.mode) and + item.get('hardlink_master', True) and 'source' not in item): + hardlink_masters[item.get('path')] = (item.get('chunks'), None) + + filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter): if strip_components: item.path = os.sep.join(item.path.split(os.sep)[strip_components:]) @@ -319,11 +326,16 @@ class FuseBackend(object): parent = 1 for segment in segments[:-1]: parent = self._process_inner(segment, parent) - self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode) + self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode, + hardlink_masters, strip_components) duration = time.perf_counter() - t0 logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name) - def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode): + def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components): + path = item.path + del item.path # save some space + hardlink_masters = hardlink_masters or {} + def file_version(item, path): if 'chunks' in item: file_id = blake2b_128(path) @@ -348,35 +360,44 @@ class FuseBackend(object): version_enc = os.fsencode('.%05d' % version) return name + version_enc + ext + if 'source' in item and hardlinkable(item.mode): + source = os.path.join(*item.source.split(os.sep)[stripped_components:]) + chunks, link_target = hardlink_masters.get(item.source, (None, source)) + if link_target: + # Hard link was extracted previously, just link + link_target = os.fsencode(link_target) + if self.versions: + # adjust link target name with version + version = self.file_versions[link_target] + link_target = make_versioned_name(link_target, version, add_dir=True) + try: + inode = self.find_inode(link_target, prefix) + except KeyError: + logger.warning('Skipping broken hard link: %s -> %s', path, source) + return + item = self.get_item(inode) + item.nlink = item.get('nlink', 1) + 1 + self._items[inode] = item + elif chunks is not None: + # assign chunks to this item, since the item which had the chunks was not extracted + item.chunks = chunks + inode = item_inode + self._items[inode] = item + if hardlink_masters: + # Update master entry with extracted item path, so that following hardlinks don't extract twice. + hardlink_masters[item.source] = (None, path) + else: + inode = item_inode + if self.versions and not is_dir: parent = self._process_inner(name, parent) - path = os.fsencode(item.path) - version = file_version(item, path) + enc_path = os.fsencode(path) + version = file_version(item, enc_path) if version is not None: # regular file, with contents - maybe a hardlink master name = make_versioned_name(name, version) - self.file_versions[path] = version + self.file_versions[enc_path] = version - path = item.path - del item.path # save some space - if 'source' in item and hardlinkable(item.mode): - # a hardlink, no contents, is the hardlink master - source = os.fsencode(item.source) - if self.versions: - # adjust source name with version - version = self.file_versions[source] - source = make_versioned_name(source, version, add_dir=True) - name = make_versioned_name(name, version) - try: - inode = self.find_inode(source, prefix) - except KeyError: - logger.warning('Skipping broken hard link: %s -> %s', path, item.source) - return - item = self.cache.get(inode) - item.nlink = item.get('nlink', 1) + 1 - self._items[inode] = item - else: - inode = item_inode self.parent[inode] = parent if name: self.contents[parent][name] = inode diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 227b2d336..51651fd80 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -760,7 +760,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.mkdir(os.path.join(self.input_path, 'dir1')) os.mkdir(os.path.join(self.input_path, 'dir1/subdir')) - self.create_regular_file('source') + self.create_regular_file('source', contents=b'123456') os.link(os.path.join(self.input_path, 'source'), os.path.join(self.input_path, 'abba')) os.link(os.path.join(self.input_path, 'source'), @@ -778,30 +778,56 @@ class ArchiverTestCase(ArchiverTestCaseBase): requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported') @requires_hardlinks - def test_strip_components_links(self): + @unittest.skipUnless(has_llfuse, 'llfuse not installed') + def test_mount_hardlinks(self): self._extract_hardlinks_setup() - with changedir('output'): - self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') + mountpoint = os.path.join(self.tmpdir, 'mountpoint') + with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \ + changedir(mountpoint): assert os.stat('hardlink').st_nlink == 2 assert os.stat('subdir/hardlink').st_nlink == 2 + assert open('subdir/hardlink', 'rb').read() == b'123456' assert os.stat('aaaa').st_nlink == 2 assert os.stat('source2').st_nlink == 2 - with changedir('output'): - self.cmd('extract', self.repository_location + '::test') + with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \ + changedir(mountpoint): + assert os.stat('input/dir1/hardlink').st_nlink == 2 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' + assert os.stat('input/dir1/aaaa').st_nlink == 2 + assert os.stat('input/dir1/source2').st_nlink == 2 + with self.fuse_mount(self.repository_location + '::test', mountpoint), \ + changedir(mountpoint): + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' @requires_hardlinks def test_extract_hardlinks(self): self._extract_hardlinks_setup() + with changedir('output'): + self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') + assert os.stat('hardlink').st_nlink == 2 + assert os.stat('subdir/hardlink').st_nlink == 2 + assert open('subdir/hardlink', 'rb').read() == b'123456' + assert os.stat('aaaa').st_nlink == 2 + assert os.stat('source2').st_nlink == 2 with changedir('output'): self.cmd('extract', self.repository_location + '::test', 'input/dir1') assert os.stat('input/dir1/hardlink').st_nlink == 2 assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' assert os.stat('input/dir1/aaaa').st_nlink == 2 assert os.stat('input/dir1/source2').st_nlink == 2 with changedir('output'): self.cmd('extract', self.repository_location + '::test') + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' def test_extract_include_exclude(self): self.cmd('init', '--encryption=repokey', self.repository_location) @@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('test', contents=b'first') if are_hardlinks_supported(): - self.create_regular_file('hardlink1', contents=b'') + self.create_regular_file('hardlink1', contents=b'123456') os.link('input/hardlink1', 'input/hardlink2') + os.link('input/hardlink1', 'input/hardlink3') self.cmd('create', self.repository_location + '::archive1', 'input') self.create_regular_file('test', contents=b'second') self.cmd('create', self.repository_location + '::archive2', 'input') @@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files} if are_hardlinks_supported(): - st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')) - st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')) - assert st1.st_ino == st2.st_ino + hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001') + hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001') + hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001') + assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert open(hl3, 'rb').read() == b'123456' + # similar again, but exclude the hardlink master: + with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'): + if are_hardlinks_supported(): + hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001') + hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001') + assert os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert open(hl3, 'rb').read() == b'123456' @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_allow_damaged_files(self):