diff --git a/src/borg/archiver.py b/src/borg/archiver.py index a8cab84be..fdeaec0f9 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2517,6 +2517,9 @@ class Archiver: subparser.add_argument('-o', dest='options', type=str, help='Extra mount options') define_archive_filters_group(subparser) + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, + help='paths to extract; patterns are supported') + define_exclusion_group(subparser, strip_components=True) if parser.prog == 'borgfs': return parser diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 129e3b20b..399f7d084 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -17,6 +17,7 @@ from .logger import create_logger logger = create_logger() from .crypto.low_level import blake2b_128 +from .archiver import Archiver from .archive import Archive from .hashindex import FuseVersionsIndex from .helpers import daemonize, hardlinkable, signal_handler, format_file_size @@ -118,7 +119,7 @@ class ItemCache: else: raise ValueError('Invalid entry type in self.meta') - def iter_archive_items(self, archive_item_ids): + def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False): unpacker = msgpack.Unpacker() # Current offset in the metadata stream, which consists of all metadata chunks glued together @@ -161,6 +162,11 @@ class ItemCache: # Need more data, feed the next chunk break + item = Item(internal_dict=item) + if filter and not filter(item) or not consider_part_files and 'part' in item: + msgpacked_bytes = b'' + continue + current_item = msgpacked_bytes current_item_length = len(current_item) current_spans_chunks = stream_offset - current_item_length < chunk_begin @@ -197,7 +203,7 @@ class ItemCache: inode = write_offset + self.offset write_offset += 9 - yield inode, Item(internal_dict=item) + yield inode, item self.write_offset = write_offset @@ -325,7 +331,21 @@ class FuseOperations(llfuse.Operations): t0 = time.perf_counter() archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name, consider_part_files=self.args.consider_part_files) - for item_inode, item in self.cache.iter_archive_items(archive.metadata.items): + strip_components = self.args.strip_components + matcher = Archiver.build_matcher(self.args.patterns, self.args.paths) + partial_extract = not matcher.empty() or strip_components + hardlink_masters = {} if partial_extract else None + + def peek_and_store_hardlink_masters(item, matched): + if (partial_extract and not matched and hardlinkable(item.mode) and + item.get('hardlink_master', True) and 'source' not in item): + hardlink_masters[item.get('path')] = (item.get('chunks'), None) + + filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) + for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter, + consider_part_files=self.args.consider_part_files): + if strip_components: + item.path = os.sep.join(item.path.split(os.sep)[strip_components:]) path = os.fsencode(item.path) is_dir = stat.S_ISDIR(item.mode) if is_dir: @@ -343,11 +363,16 @@ class FuseOperations(llfuse.Operations): parent = 1 for segment in segments[:-1]: parent = self.process_inner(segment, parent) - self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode) + self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode, + hardlink_masters, strip_components) duration = time.perf_counter() - t0 logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name) - def process_leaf(self, name, item, parent, prefix, is_dir, item_inode): + def process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components): + path = item.path + del item.path # save some space + hardlink_masters = hardlink_masters or {} + def file_version(item, path): if 'chunks' in item: file_id = blake2b_128(path) @@ -372,35 +397,44 @@ class FuseOperations(llfuse.Operations): version_enc = os.fsencode('.%05d' % version) return name + version_enc + ext + if 'source' in item and hardlinkable(item.mode): + source = os.path.join(*item.source.split(os.sep)[stripped_components:]) + chunks, link_target = hardlink_masters.get(item.source, (None, source)) + if link_target: + # Hard link was extracted previously, just link + link_target = os.fsencode(link_target) + if self.versions: + # adjust link target name with version + version = self.file_versions[link_target] + link_target = make_versioned_name(link_target, version, add_dir=True) + try: + inode = self._find_inode(link_target, prefix) + except KeyError: + logger.warning('Skipping broken hard link: %s -> %s', path, source) + return + item = self.get_item(inode) + item.nlink = item.get('nlink', 1) + 1 + self.items[inode] = item + elif chunks is not None: + # assign chunks to this item, since the item which had the chunks was not extracted + item.chunks = chunks + inode = item_inode + self.items[inode] = item + if hardlink_masters: + # Update master entry with extracted item path, so that following hardlinks don't extract twice. + hardlink_masters[item.source] = (None, path) + else: + inode = item_inode + if self.versions and not is_dir: parent = self.process_inner(name, parent) - path = os.fsencode(item.path) - version = file_version(item, path) + enc_path = os.fsencode(path) + version = file_version(item, enc_path) if version is not None: # regular file, with contents - maybe a hardlink master name = make_versioned_name(name, version) - self.file_versions[path] = version + self.file_versions[enc_path] = version - path = item.path - del item.path # save some space - if 'source' in item and hardlinkable(item.mode): - # a hardlink, no contents, is the hardlink master - source = os.fsencode(item.source) - if self.versions: - # adjust source name with version - version = self.file_versions[source] - source = make_versioned_name(source, version, add_dir=True) - name = make_versioned_name(name, version) - try: - inode = self._find_inode(source, prefix) - except KeyError: - logger.warning('Skipping broken hard link: %s -> %s', path, item.source) - return - item = self.cache.get(inode) - item.nlink = item.get('nlink', 1) + 1 - self.items[inode] = item - else: - inode = item_inode self.parent[inode] = parent if name: self.contents[parent][name] = inode diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 1c2e83708..d4f716f6c 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -759,7 +759,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.mkdir(os.path.join(self.input_path, 'dir1')) os.mkdir(os.path.join(self.input_path, 'dir1/subdir')) - self.create_regular_file('source') + self.create_regular_file('source', contents=b'123456') os.link(os.path.join(self.input_path, 'source'), os.path.join(self.input_path, 'abba')) os.link(os.path.join(self.input_path, 'source'), @@ -777,30 +777,56 @@ class ArchiverTestCase(ArchiverTestCaseBase): requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported') @requires_hardlinks - def test_strip_components_links(self): + @unittest.skipUnless(has_llfuse, 'llfuse not installed') + def test_mount_hardlinks(self): self._extract_hardlinks_setup() - with changedir('output'): - self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') + mountpoint = os.path.join(self.tmpdir, 'mountpoint') + with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \ + changedir(mountpoint): assert os.stat('hardlink').st_nlink == 2 assert os.stat('subdir/hardlink').st_nlink == 2 + assert open('subdir/hardlink', 'rb').read() == b'123456' assert os.stat('aaaa').st_nlink == 2 assert os.stat('source2').st_nlink == 2 - with changedir('output'): - self.cmd('extract', self.repository_location + '::test') + with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \ + changedir(mountpoint): + assert os.stat('input/dir1/hardlink').st_nlink == 2 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' + assert os.stat('input/dir1/aaaa').st_nlink == 2 + assert os.stat('input/dir1/source2').st_nlink == 2 + with self.fuse_mount(self.repository_location + '::test', mountpoint), \ + changedir(mountpoint): + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' @requires_hardlinks def test_extract_hardlinks(self): self._extract_hardlinks_setup() + with changedir('output'): + self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') + assert os.stat('hardlink').st_nlink == 2 + assert os.stat('subdir/hardlink').st_nlink == 2 + assert open('subdir/hardlink', 'rb').read() == b'123456' + assert os.stat('aaaa').st_nlink == 2 + assert os.stat('source2').st_nlink == 2 with changedir('output'): self.cmd('extract', self.repository_location + '::test', 'input/dir1') assert os.stat('input/dir1/hardlink').st_nlink == 2 assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' assert os.stat('input/dir1/aaaa').st_nlink == 2 assert os.stat('input/dir1/source2').st_nlink == 2 with changedir('output'): self.cmd('extract', self.repository_location + '::test') + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' def test_extract_include_exclude(self): self.cmd('init', '--encryption=repokey', self.repository_location) @@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('test', contents=b'first') if are_hardlinks_supported(): - self.create_regular_file('hardlink1', contents=b'') + self.create_regular_file('hardlink1', contents=b'123456') os.link('input/hardlink1', 'input/hardlink2') + os.link('input/hardlink1', 'input/hardlink3') self.cmd('create', self.repository_location + '::archive1', 'input') self.create_regular_file('test', contents=b'second') self.cmd('create', self.repository_location + '::archive2', 'input') @@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files} if are_hardlinks_supported(): - st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')) - st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')) - assert st1.st_ino == st2.st_ino + hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001') + hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001') + hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001') + assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert open(hl3, 'rb').read() == b'123456' + # similar again, but exclude the hardlink master: + with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'): + if are_hardlinks_supported(): + hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001') + hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001') + assert os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert open(hl3, 'rb').read() == b'123456' @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_allow_damaged_files(self):