mirror of https://github.com/borgbackup/borg.git
parent
220d44b2b8
commit
9211d0fa18
|
@ -298,7 +298,19 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
cache.rollback()
|
||||
return stats
|
||||
|
||||
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False):
|
||||
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
|
||||
hardlink_masters=None, original_path=None):
|
||||
"""
|
||||
Extract archive item.
|
||||
|
||||
:param item: the item to extract
|
||||
:param restore_attrs: restore file attributes
|
||||
:param dry_run: do not write any data
|
||||
:param stdout: write extracted data to stdout
|
||||
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
|
||||
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
|
||||
:param original_path: b'path' key as stored in archive
|
||||
"""
|
||||
if dry_run or stdout:
|
||||
if b'chunks' in item:
|
||||
for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True):
|
||||
|
@ -308,6 +320,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
sys.stdout.buffer.flush()
|
||||
return
|
||||
|
||||
original_path = original_path or item[b'path']
|
||||
dest = self.cwd
|
||||
if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
|
||||
raise Exception('Path should be relative and local')
|
||||
|
@ -327,25 +340,36 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
if stat.S_ISREG(mode):
|
||||
if not os.path.exists(os.path.dirname(path)):
|
||||
os.makedirs(os.path.dirname(path))
|
||||
|
||||
# Hard link?
|
||||
if b'source' in item:
|
||||
source = os.path.join(dest, item[b'source'])
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
os.link(source, path)
|
||||
else:
|
||||
with open(path, 'wb') as fd:
|
||||
ids = [c[0] for c in item[b'chunks']]
|
||||
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
|
||||
if sparse and self.zeros.startswith(data):
|
||||
# all-zero chunk: create a hole in a sparse file
|
||||
fd.seek(len(data), 1)
|
||||
else:
|
||||
fd.write(data)
|
||||
pos = fd.tell()
|
||||
fd.truncate(pos)
|
||||
fd.flush()
|
||||
self.restore_attrs(path, item, fd=fd.fileno())
|
||||
if not hardlink_masters:
|
||||
os.link(source, path)
|
||||
return
|
||||
item[b'chunks'], link_target = hardlink_masters[item[b'source']]
|
||||
if link_target:
|
||||
# Hard link was extracted previously, just link
|
||||
os.link(link_target, path)
|
||||
return
|
||||
# Extract chunks, since the item which had the chunks was not extracted
|
||||
with open(path, 'wb') as fd:
|
||||
ids = [c[0] for c in item[b'chunks']]
|
||||
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
|
||||
if sparse and self.zeros.startswith(data):
|
||||
# all-zero chunk: create a hole in a sparse file
|
||||
fd.seek(len(data), 1)
|
||||
else:
|
||||
fd.write(data)
|
||||
pos = fd.tell()
|
||||
fd.truncate(pos)
|
||||
fd.flush()
|
||||
self.restore_attrs(path, item, fd=fd.fileno())
|
||||
if hardlink_masters:
|
||||
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
|
||||
hardlink_masters[item.get(b'source') or original_path] = (None, path)
|
||||
elif stat.S_ISDIR(mode):
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
|
@ -527,7 +551,10 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
source = self.hard_links.get((st.st_ino, st.st_dev))
|
||||
if (st.st_ino, st.st_dev) in self.hard_links:
|
||||
item = self.stat_attrs(st, path)
|
||||
item.update({b'path': safe_path, b'source': source})
|
||||
item.update({
|
||||
b'path': safe_path,
|
||||
b'source': source,
|
||||
})
|
||||
self.add_item(item)
|
||||
status = 'h' # regular file, hardlink (to already seen inodes)
|
||||
return status
|
||||
|
@ -549,7 +576,10 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
status = 'U' # regular file, unchanged
|
||||
else:
|
||||
status = 'A' # regular file, added
|
||||
item = {b'path': safe_path}
|
||||
item = {
|
||||
b'path': safe_path,
|
||||
b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks
|
||||
}
|
||||
# Only chunkify the file if needed
|
||||
if chunks is None:
|
||||
fh = Archive._open_rb(path)
|
||||
|
@ -587,7 +617,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
|
||||
|
||||
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
|
||||
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
|
||||
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
|
||||
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
|
||||
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
|
||||
|
||||
|
|
|
@ -353,8 +353,20 @@ class Archiver:
|
|||
sparse = args.sparse
|
||||
strip_components = args.strip_components
|
||||
dirs = []
|
||||
for item in archive.iter_items(lambda item: matcher.match(item[b'path']), preload=True):
|
||||
partial_extract = not matcher.empty() or strip_components
|
||||
hardlink_masters = {} if partial_extract else None
|
||||
|
||||
def item_is_hardlink_master(item):
|
||||
return (partial_extract and stat.S_ISREG(item[b'mode']) and
|
||||
item.get(b'hardlink_master', True) and b'source' not in item)
|
||||
|
||||
for item in archive.iter_items(preload=True,
|
||||
filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
|
||||
orig_path = item[b'path']
|
||||
if item_is_hardlink_master(item):
|
||||
hardlink_masters[orig_path] = (item.get(b'chunks'), item.get(b'source'))
|
||||
if not matcher.match(item[b'path']):
|
||||
continue
|
||||
if strip_components:
|
||||
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
|
||||
if not item[b'path']:
|
||||
|
@ -372,7 +384,8 @@ class Archiver:
|
|||
dirs.append(item)
|
||||
archive.extract_item(item, restore_attrs=False)
|
||||
else:
|
||||
archive.extract_item(item, stdout=stdout, sparse=sparse)
|
||||
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
|
||||
original_path=orig_path)
|
||||
except OSError as e:
|
||||
self.print_warning('%s: %s', remove_surrogates(orig_path), e)
|
||||
|
||||
|
|
|
@ -286,6 +286,9 @@ class PatternMatcher:
|
|||
# Value to return from match function when none of the patterns match.
|
||||
self.fallback = fallback
|
||||
|
||||
def empty(self):
|
||||
return not len(self._items)
|
||||
|
||||
def add(self, patterns, value):
|
||||
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
|
||||
given patterns matches.
|
||||
|
|
|
@ -467,6 +467,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
with self.assert_creates_file('input/dir/file'):
|
||||
self.cmd('extract', self.repository_location + '::test', '--strip-components', '0')
|
||||
|
||||
def _extract_hardlinks_setup(self):
|
||||
os.mkdir(os.path.join(self.input_path, 'dir1'))
|
||||
os.mkdir(os.path.join(self.input_path, 'dir1/subdir'))
|
||||
|
||||
self.create_regular_file('source')
|
||||
os.link(os.path.join(self.input_path, 'source'),
|
||||
os.path.join(self.input_path, 'abba'))
|
||||
os.link(os.path.join(self.input_path, 'source'),
|
||||
os.path.join(self.input_path, 'dir1/hardlink'))
|
||||
os.link(os.path.join(self.input_path, 'source'),
|
||||
os.path.join(self.input_path, 'dir1/subdir/hardlink'))
|
||||
|
||||
self.create_regular_file('dir1/source2')
|
||||
os.link(os.path.join(self.input_path, 'dir1/source2'),
|
||||
os.path.join(self.input_path, 'dir1/aaaa'))
|
||||
|
||||
self.cmd('init', self.repository_location)
|
||||
self.cmd('create', self.repository_location + '::test', 'input')
|
||||
|
||||
def test_strip_components_links(self):
|
||||
self._extract_hardlinks_setup()
|
||||
with changedir('output'):
|
||||
self.cmd('extract', self.repository_location + '::test', '--strip-components', '2')
|
||||
assert os.stat('hardlink').st_nlink == 2
|
||||
assert os.stat('subdir/hardlink').st_nlink == 2
|
||||
assert os.stat('aaaa').st_nlink == 2
|
||||
assert os.stat('source2').st_nlink == 2
|
||||
with changedir('output'):
|
||||
self.cmd('extract', self.repository_location + '::test')
|
||||
assert os.stat('input/dir1/hardlink').st_nlink == 4
|
||||
|
||||
def test_extract_hardlinks(self):
|
||||
self._extract_hardlinks_setup()
|
||||
with changedir('output'):
|
||||
self.cmd('extract', self.repository_location + '::test', 'input/dir1')
|
||||
assert os.stat('input/dir1/hardlink').st_nlink == 2
|
||||
assert os.stat('input/dir1/subdir/hardlink').st_nlink == 2
|
||||
assert os.stat('input/dir1/aaaa').st_nlink == 2
|
||||
assert os.stat('input/dir1/source2').st_nlink == 2
|
||||
with changedir('output'):
|
||||
self.cmd('extract', self.repository_location + '::test')
|
||||
assert os.stat('input/dir1/hardlink').st_nlink == 4
|
||||
|
||||
def test_extract_include_exclude(self):
|
||||
self.cmd('init', self.repository_location)
|
||||
self.create_regular_file('file1', size=1024 * 80)
|
||||
|
|
Loading…
Reference in New Issue