From 52410b6976202680ac31abea683424a17b779f99 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 7 Nov 2017 17:45:22 +0100 Subject: [PATCH 1/3] borg mount: support exclusion group options and paths, fixes #2138 borg mount [options] repo_or_archive mountpoint path [paths...] paths: you can just give some "root paths" (like for borg extract) to only partially populate the FUSE filesystem. Similar for these exclusion group options: --exclude --exclude-from --pattern --patterns-from --strip-components (cherry picked from commit 77df1cfe8c01eae932631fa7873b05b99b6b6b55) --- src/borg/archiver.py | 3 +++ src/borg/fuse.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index a8cab84be..fdeaec0f9 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2517,6 +2517,9 @@ class Archiver: subparser.add_argument('-o', dest='options', type=str, help='Extra mount options') define_archive_filters_group(subparser) + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, + help='paths to extract; patterns are supported') + define_exclusion_group(subparser, strip_components=True) if parser.prog == 'borgfs': return parser diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 129e3b20b..3bc82eef0 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -17,6 +17,7 @@ from .logger import create_logger logger = create_logger() from .crypto.low_level import blake2b_128 +from .archiver import Archiver from .archive import Archive from .hashindex import FuseVersionsIndex from .helpers import daemonize, hardlinkable, signal_handler, format_file_size @@ -118,7 +119,7 @@ class ItemCache: else: raise ValueError('Invalid entry type in self.meta') - def iter_archive_items(self, archive_item_ids): + def iter_archive_items(self, archive_item_ids, filter=None): unpacker = msgpack.Unpacker() # Current offset in the metadata stream, which consists of all metadata chunks glued together @@ -161,6 +162,11 @@ class ItemCache: # Need more data, feed the next chunk break + item = Item(internal_dict=item) + if filter and not filter(item): + msgpacked_bytes = b'' + continue + current_item = msgpacked_bytes current_item_length = len(current_item) current_spans_chunks = stream_offset - current_item_length < chunk_begin @@ -197,7 +203,7 @@ class ItemCache: inode = write_offset + self.offset write_offset += 9 - yield inode, Item(internal_dict=item) + yield inode, item self.write_offset = write_offset @@ -325,7 +331,13 @@ class FuseOperations(llfuse.Operations): t0 = time.perf_counter() archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name, consider_part_files=self.args.consider_part_files) - for item_inode, item in self.cache.iter_archive_items(archive.metadata.items): + strip_components = self.args.strip_components + matcher = Archiver.build_matcher(self.args.patterns, self.args.paths) + dummy = lambda x, y: None # TODO: add hardlink_master support code, see Archiver + filter = Archiver.build_filter(matcher, dummy, strip_components) + for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter): + if strip_components: + item.path = os.sep.join(item.path.split(os.sep)[strip_components:]) path = os.fsencode(item.path) is_dir = stat.S_ISDIR(item.mode) if is_dir: From 0e0764757688bd0085491c26b2f2c58ac5fa8028 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 21 Nov 2017 01:20:57 +0100 Subject: [PATCH 2/3] borg mount: support hardlinks correctly, add tests previous commit did not yet support hardlinks correctly, if the hardlink master was excluded somehow. added some tests for this, also refactored related tests slightly. (cherry picked from commit e97deafb16ac44450b542e4e23d57556631f647b) --- src/borg/fuse.py | 75 ++++++++++++++++++++++------------ src/borg/testsuite/archiver.py | 56 ++++++++++++++++++++----- 2 files changed, 94 insertions(+), 37 deletions(-) diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 3bc82eef0..e06651fa6 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -333,8 +333,15 @@ class FuseOperations(llfuse.Operations): consider_part_files=self.args.consider_part_files) strip_components = self.args.strip_components matcher = Archiver.build_matcher(self.args.patterns, self.args.paths) - dummy = lambda x, y: None # TODO: add hardlink_master support code, see Archiver - filter = Archiver.build_filter(matcher, dummy, strip_components) + partial_extract = not matcher.empty() or strip_components + hardlink_masters = {} if partial_extract else None + + def peek_and_store_hardlink_masters(item, matched): + if (partial_extract and not matched and hardlinkable(item.mode) and + item.get('hardlink_master', True) and 'source' not in item): + hardlink_masters[item.get('path')] = (item.get('chunks'), None) + + filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter): if strip_components: item.path = os.sep.join(item.path.split(os.sep)[strip_components:]) @@ -355,11 +362,16 @@ class FuseOperations(llfuse.Operations): parent = 1 for segment in segments[:-1]: parent = self.process_inner(segment, parent) - self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode) + self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode, + hardlink_masters, strip_components) duration = time.perf_counter() - t0 logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name) - def process_leaf(self, name, item, parent, prefix, is_dir, item_inode): + def process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components): + path = item.path + del item.path # save some space + hardlink_masters = hardlink_masters or {} + def file_version(item, path): if 'chunks' in item: file_id = blake2b_128(path) @@ -384,35 +396,44 @@ class FuseOperations(llfuse.Operations): version_enc = os.fsencode('.%05d' % version) return name + version_enc + ext + if 'source' in item and hardlinkable(item.mode): + source = os.path.join(*item.source.split(os.sep)[stripped_components:]) + chunks, link_target = hardlink_masters.get(item.source, (None, source)) + if link_target: + # Hard link was extracted previously, just link + link_target = os.fsencode(link_target) + if self.versions: + # adjust link target name with version + version = self.file_versions[link_target] + link_target = make_versioned_name(link_target, version, add_dir=True) + try: + inode = self._find_inode(link_target, prefix) + except KeyError: + logger.warning('Skipping broken hard link: %s -> %s', path, source) + return + item = self.get_item(inode) + item.nlink = item.get('nlink', 1) + 1 + self.items[inode] = item + elif chunks is not None: + # assign chunks to this item, since the item which had the chunks was not extracted + item.chunks = chunks + inode = item_inode + self.items[inode] = item + if hardlink_masters: + # Update master entry with extracted item path, so that following hardlinks don't extract twice. + hardlink_masters[item.source] = (None, path) + else: + inode = item_inode + if self.versions and not is_dir: parent = self.process_inner(name, parent) - path = os.fsencode(item.path) - version = file_version(item, path) + enc_path = os.fsencode(path) + version = file_version(item, enc_path) if version is not None: # regular file, with contents - maybe a hardlink master name = make_versioned_name(name, version) - self.file_versions[path] = version + self.file_versions[enc_path] = version - path = item.path - del item.path # save some space - if 'source' in item and hardlinkable(item.mode): - # a hardlink, no contents, is the hardlink master - source = os.fsencode(item.source) - if self.versions: - # adjust source name with version - version = self.file_versions[source] - source = make_versioned_name(source, version, add_dir=True) - name = make_versioned_name(name, version) - try: - inode = self._find_inode(source, prefix) - except KeyError: - logger.warning('Skipping broken hard link: %s -> %s', path, item.source) - return - item = self.cache.get(inode) - item.nlink = item.get('nlink', 1) + 1 - self.items[inode] = item - else: - inode = item_inode self.parent[inode] = parent if name: self.contents[parent][name] = inode diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 1c2e83708..d4f716f6c 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -759,7 +759,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.mkdir(os.path.join(self.input_path, 'dir1')) os.mkdir(os.path.join(self.input_path, 'dir1/subdir')) - self.create_regular_file('source') + self.create_regular_file('source', contents=b'123456') os.link(os.path.join(self.input_path, 'source'), os.path.join(self.input_path, 'abba')) os.link(os.path.join(self.input_path, 'source'), @@ -777,30 +777,56 @@ class ArchiverTestCase(ArchiverTestCaseBase): requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported') @requires_hardlinks - def test_strip_components_links(self): + @unittest.skipUnless(has_llfuse, 'llfuse not installed') + def test_mount_hardlinks(self): self._extract_hardlinks_setup() - with changedir('output'): - self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') + mountpoint = os.path.join(self.tmpdir, 'mountpoint') + with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \ + changedir(mountpoint): assert os.stat('hardlink').st_nlink == 2 assert os.stat('subdir/hardlink').st_nlink == 2 + assert open('subdir/hardlink', 'rb').read() == b'123456' assert os.stat('aaaa').st_nlink == 2 assert os.stat('source2').st_nlink == 2 - with changedir('output'): - self.cmd('extract', self.repository_location + '::test') + with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \ + changedir(mountpoint): + assert os.stat('input/dir1/hardlink').st_nlink == 2 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' + assert os.stat('input/dir1/aaaa').st_nlink == 2 + assert os.stat('input/dir1/source2').st_nlink == 2 + with self.fuse_mount(self.repository_location + '::test', mountpoint), \ + changedir(mountpoint): + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' @requires_hardlinks def test_extract_hardlinks(self): self._extract_hardlinks_setup() + with changedir('output'): + self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') + assert os.stat('hardlink').st_nlink == 2 + assert os.stat('subdir/hardlink').st_nlink == 2 + assert open('subdir/hardlink', 'rb').read() == b'123456' + assert os.stat('aaaa').st_nlink == 2 + assert os.stat('source2').st_nlink == 2 with changedir('output'): self.cmd('extract', self.repository_location + '::test', 'input/dir1') assert os.stat('input/dir1/hardlink').st_nlink == 2 assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' assert os.stat('input/dir1/aaaa').st_nlink == 2 assert os.stat('input/dir1/source2').st_nlink == 2 with changedir('output'): self.cmd('extract', self.repository_location + '::test') + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' def test_extract_include_exclude(self): self.cmd('init', '--encryption=repokey', self.repository_location) @@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', '--encryption=repokey', self.repository_location) self.create_regular_file('test', contents=b'first') if are_hardlinks_supported(): - self.create_regular_file('hardlink1', contents=b'') + self.create_regular_file('hardlink1', contents=b'123456') os.link('input/hardlink1', 'input/hardlink2') + os.link('input/hardlink1', 'input/hardlink3') self.cmd('create', self.repository_location + '::archive1', 'input') self.create_regular_file('test', contents=b'second') self.cmd('create', self.repository_location + '::archive2', 'input') @@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files} if are_hardlinks_supported(): - st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')) - st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')) - assert st1.st_ino == st2.st_ino + hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001') + hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001') + hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001') + assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert open(hl3, 'rb').read() == b'123456' + # similar again, but exclude the hardlink master: + with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'): + if are_hardlinks_supported(): + hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001') + hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001') + assert os.stat(hl2).st_ino == os.stat(hl3).st_ino + assert open(hl3, 'rb').read() == b'123456' @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_allow_damaged_files(self): From 47d16e0f6251802d643b57e8b839329e999b5b99 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 22 Nov 2017 15:08:08 +0100 Subject: [PATCH 3/3] borg mount: support --consider-part-files correctly, fixes #3347 (cherry picked from commit caece370b8931751195f439f5a5c08baa97d65f9) --- src/borg/fuse.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/borg/fuse.py b/src/borg/fuse.py index e06651fa6..399f7d084 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -119,7 +119,7 @@ class ItemCache: else: raise ValueError('Invalid entry type in self.meta') - def iter_archive_items(self, archive_item_ids, filter=None): + def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False): unpacker = msgpack.Unpacker() # Current offset in the metadata stream, which consists of all metadata chunks glued together @@ -163,7 +163,7 @@ class ItemCache: break item = Item(internal_dict=item) - if filter and not filter(item): + if filter and not filter(item) or not consider_part_files and 'part' in item: msgpacked_bytes = b'' continue @@ -342,7 +342,8 @@ class FuseOperations(llfuse.Operations): hardlink_masters[item.get('path')] = (item.get('chunks'), None) filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components) - for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter): + for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter, + consider_part_files=self.args.consider_part_files): if strip_components: item.path = os.sep.join(item.path.split(os.sep)[strip_components:]) path = os.fsencode(item.path)