borg extract: fix links failing for sub-tree extractions

fixes #761
This commit is contained in:
Marian Beermann 2016-03-17 22:39:57 +01:00
parent 220d44b2b8
commit 9211d0fa18
4 changed files with 109 additions and 20 deletions

View File

@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
cache.rollback()
return stats
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, original_path=None):
"""
Extract archive item.
:param item: the item to extract
:param restore_attrs: restore file attributes
:param dry_run: do not write any data
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param original_path: b'path' key as stored in archive
"""
if dry_run or stdout:
if b'chunks' in item:
for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
sys.stdout.buffer.flush()
return
original_path = original_path or item[b'path']
dest = self.cwd
if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
raise Exception('Path should be relative and local')
@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
if stat.S_ISREG(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
# Hard link?
if b'source' in item:
source = os.path.join(dest, item[b'source'])
if os.path.exists(path):
os.unlink(path)
os.link(source, path)
else:
with open(path, 'wb') as fd:
ids = [c[0] for c in item[b'chunks']]
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pos = fd.tell()
fd.truncate(pos)
fd.flush()
self.restore_attrs(path, item, fd=fd.fileno())
if not hardlink_masters:
os.link(source, path)
return
item[b'chunks'], link_target = hardlink_masters[item[b'source']]
if link_target:
# Hard link was extracted previously, just link
os.link(link_target, path)
return
# Extract chunks, since the item which had the chunks was not extracted
with open(path, 'wb') as fd:
ids = [c[0] for c in item[b'chunks']]
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pos = fd.tell()
fd.truncate(pos)
fd.flush()
self.restore_attrs(path, item, fd=fd.fileno())
if hardlink_masters:
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
hardlink_masters[item.get(b'source') or original_path] = (None, path)
elif stat.S_ISDIR(mode):
if not os.path.exists(path):
os.makedirs(path)
@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
source = self.hard_links.get((st.st_ino, st.st_dev))
if (st.st_ino, st.st_dev) in self.hard_links:
item = self.stat_attrs(st, path)
item.update({b'path': safe_path, b'source': source})
item.update({
b'path': safe_path,
b'source': source,
})
self.add_item(item)
status = 'h' # regular file, hardlink (to already seen inodes)
return status
@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
status = 'U' # regular file, unchanged
else:
status = 'A' # regular file, added
item = {b'path': safe_path}
item = {
b'path': safe_path,
b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks
}
# Only chunkify the file if needed
if chunks is None:
fh = Archive._open_rb(path)
@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])

View File

@ -353,8 +353,20 @@ class Archiver:
sparse = args.sparse
strip_components = args.strip_components
dirs = []
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
def item_is_hardlink_master(item):
return (partial_extract and stat.S_ISREG(item[b'mode']) and
item.get(b'hardlink_master', True) and b'source' not in item)
for item in archive.iter_items(preload=True,
filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
orig_path = item[b'path']
if item_is_hardlink_master(item):
hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
if not matcher.match(item[b'path']):
continue
if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
if not item[b'path']:
@ -372,7 +384,8 @@ class Archiver:
dirs.append(item)
archive.extract_item(item, restore_attrs=False)
else:
archive.extract_item(item, stdout=stdout, sparse=sparse)
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
original_path=orig_path)
except OSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e)

View File

@ -286,6 +286,9 @@ class PatternMatcher:
# Value to return from match function when none of the patterns match.
self.fallback = fallback
def empty(self):
return not len(self._items)
def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches.

View File

@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
with self.assert_creates_file('input/dir/file'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
def _extract_hardlinks_setup(self):
os.mkdir(os.path.join(self.input_path, 'dir1'))
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
self.create_regular_file('source')
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'abba'))
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'dir1/hardlink'))
os.link(os.path.join(self.input_path, 'source'),
os.path.join(self.input_path, 'dir1/subdir/hardlink'))
self.create_regular_file('dir1/source2')
os.link(os.path.join(self.input_path, 'dir1/source2'),
os.path.join(self.input_path, 'dir1/aaaa'))
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
def test_strip_components_links(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
assert os.stat('hardlink').st_nlink == 2
assert os.stat('subdir/hardlink').st_nlink == 2
assert os.stat('aaaa').st_nlink == 2
assert os.stat('source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/dir1/hardlink').st_nlink == 4
def test_extract_hardlinks(self):
self._extract_hardlinks_setup()
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
assert os.stat('input/dir1/hardlink').st_nlink == 2
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
assert os.stat('input/dir1/aaaa').st_nlink == 2
assert os.stat('input/dir1/source2').st_nlink == 2
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
assert os.stat('input/dir1/hardlink').st_nlink == 4
def test_extract_include_exclude(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)