Merge pull request #3300 from ThomasWaldmann/mount-options

borg mount: support exclusion group options and paths, fixes #2138
This commit is contained in:
TW 2017-11-23 22:43:39 +01:00 committed by GitHub
commit afc84cafd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 111 additions and 38 deletions

View File

@ -2325,6 +2325,9 @@ class Archiver:
subparser.add_argument('-o', dest='options', type=str,
help='Extra mount options')
define_archive_filters_group(subparser)
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to extract; patterns are supported')
define_exclusion_group(subparser, strip_components=True)
if parser.prog == 'borgfs':
return parser

View File

@ -17,6 +17,7 @@ from .logger import create_logger
logger = create_logger()
from .crypto.low_level import blake2b_128
from .archiver import Archiver
from .archive import Archive
from .hashindex import FuseVersionsIndex
from .helpers import daemonize, hardlinkable, signal_handler, format_file_size
@ -118,7 +119,7 @@ class ItemCache:
else:
raise ValueError('Invalid entry type in self.meta')
def iter_archive_items(self, archive_item_ids):
def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False):
unpacker = msgpack.Unpacker()
# Current offset in the metadata stream, which consists of all metadata chunks glued together
@ -161,6 +162,11 @@ class ItemCache:
# Need more data, feed the next chunk
break
item = Item(internal_dict=item)
if filter and not filter(item) or not consider_part_files and 'part' in item:
msgpacked_bytes = b''
continue
current_item = msgpacked_bytes
current_item_length = len(current_item)
current_spans_chunks = stream_offset - current_item_length < chunk_begin
@ -197,7 +203,7 @@ class ItemCache:
inode = write_offset + self.offset
write_offset += 9
yield inode, Item(internal_dict=item)
yield inode, item
self.write_offset = write_offset
@ -289,7 +295,21 @@ class FuseBackend(object):
t0 = time.perf_counter()
archive = Archive(self.repository_uncached, self.key, self._manifest, archive_name,
consider_part_files=self._args.consider_part_files)
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items):
strip_components = self._args.strip_components
matcher = Archiver.build_matcher(self._args.patterns, self._args.paths)
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
def peek_and_store_hardlink_masters(item, matched):
if (partial_extract and not matched and hardlinkable(item.mode) and
item.get('hardlink_master', True) and 'source' not in item):
hardlink_masters[item.get('path')] = (item.get('chunks'), None)
filter = Archiver.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter,
consider_part_files=self._args.consider_part_files):
if strip_components:
item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
path = os.fsencode(item.path)
is_dir = stat.S_ISDIR(item.mode)
if is_dir:
@ -307,11 +327,16 @@ class FuseBackend(object):
parent = 1
for segment in segments[:-1]:
parent = self._process_inner(segment, parent)
self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
self._process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode,
hardlink_masters, strip_components)
duration = time.perf_counter() - t0
logger.debug('fuse: _process_archive completed in %.1f s for archive %s', duration, archive.name)
def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
def _process_leaf(self, name, item, parent, prefix, is_dir, item_inode, hardlink_masters, stripped_components):
path = item.path
del item.path # save some space
hardlink_masters = hardlink_masters or {}
def file_version(item, path):
if 'chunks' in item:
file_id = blake2b_128(path)
@ -336,35 +361,44 @@ class FuseBackend(object):
version_enc = os.fsencode('.%05d' % version)
return name + version_enc + ext
if 'source' in item and hardlinkable(item.mode):
source = os.path.join(*item.source.split(os.sep)[stripped_components:])
chunks, link_target = hardlink_masters.get(item.source, (None, source))
if link_target:
# Hard link was extracted previously, just link
link_target = os.fsencode(link_target)
if self.versions:
# adjust link target name with version
version = self.file_versions[link_target]
link_target = make_versioned_name(link_target, version, add_dir=True)
try:
inode = self.find_inode(link_target, prefix)
except KeyError:
logger.warning('Skipping broken hard link: %s -> %s', path, source)
return
item = self.get_item(inode)
item.nlink = item.get('nlink', 1) + 1
self._items[inode] = item
elif chunks is not None:
# assign chunks to this item, since the item which had the chunks was not extracted
item.chunks = chunks
inode = item_inode
self._items[inode] = item
if hardlink_masters:
# Update master entry with extracted item path, so that following hardlinks don't extract twice.
hardlink_masters[item.source] = (None, path)
else:
inode = item_inode
if self.versions and not is_dir:
parent = self._process_inner(name, parent)
path = os.fsencode(item.path)
version = file_version(item, path)
enc_path = os.fsencode(path)
version = file_version(item, enc_path)
if version is not None:
# regular file, with contents - maybe a hardlink master
name = make_versioned_name(name, version)
self.file_versions[path] = version
self.file_versions[enc_path] = version
path = item.path
del item.path # save some space
if 'source' in item and hardlinkable(item.mode):
# a hardlink, no contents, <source> is the hardlink master
source = os.fsencode(item.source)
if self.versions:
# adjust source name with version
version = self.file_versions[source]
source = make_versioned_name(source, version, add_dir=True)
name = make_versioned_name(name, version)
try:
inode = self.find_inode(source, prefix)
except KeyError:
logger.warning('Skipping broken hard link: %s -> %s', path, item.source)
return
item = self.cache.get(inode)
item.nlink = item.get('nlink', 1) + 1
self._items[inode] = item
else:
inode = item_inode
self.parent[inode] = parent
if name:
self.contents[parent][name] = inode

View File

@ -760,7 +760,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
os.mkdir(os.path.join(self.input_path, 'dir1'))
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
self.create_regular_file('source')
self.create_regular_file('source', contents=b'123456')
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'abba'))
os.link(os.path.join(self.input_path, 'source'),
@ -778,30 +778,56 @@ class ArchiverTestCase(ArchiverTestCaseBase):
requires_hardlinks = pytest.mark.skipif(not are_hardlinks_supported(), reason='hardlinks not supported')
@requires_hardlinks
def test_strip_components_links(self):
@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_mount_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
with self.fuse_mount(self.repository_location + '::test', mountpoint, '--strip-components=2'), \
changedir(mountpoint):
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert open('subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
with self.fuse_mount(self.repository_location + '::test', mountpoint, 'input/dir1'), \
changedir(mountpoint):
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with self.fuse_mount(self.repository_location + '::test', mountpoint), \
changedir(mountpoint):
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
@requires_hardlinks
def test_extract_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert open('subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/source').st_nlink == 4
assert os.stat('input/abba').st_nlink == 4
assert os.stat('input/dir1/hardlink').st_nlink == 4
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4
assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456'
def test_extract_include_exclude(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
@ -2182,8 +2208,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file('test', contents=b'first')
if are_hardlinks_supported():
self.create_regular_file('hardlink1', contents=b'')
self.create_regular_file('hardlink1', contents=b'123456')
os.link('input/hardlink1', 'input/hardlink2')
os.link('input/hardlink1', 'input/hardlink3')
self.cmd('create', self.repository_location + '::archive1', 'input')
self.create_regular_file('test', contents=b'second')
self.cmd('create', self.repository_location + '::archive2', 'input')
@ -2195,9 +2222,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there
assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
if are_hardlinks_supported():
st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
assert st1.st_ino == st2.st_ino
hl1 = os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')
hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
assert os.stat(hl1).st_ino == os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, 'rb').read() == b'123456'
# similar again, but exclude the hardlink master:
with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions', '-e', 'input/hardlink1'):
if are_hardlinks_supported():
hl2 = os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')
hl3 = os.path.join(mountpoint, 'input', 'hardlink3', 'hardlink3.00001')
assert os.stat(hl2).st_ino == os.stat(hl3).st_ino
assert open(hl3, 'rb').read() == b'123456'
@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_fuse_allow_damaged_files(self):