mirror of https://github.com/borgbackup/borg.git
Merge pull request #1526 from textshell/fix/preload-leak-hardlink
extract: When doing a partial restore don't leak prefetched chunks.
This commit is contained in:
commit
47404dfde5
|
@ -161,11 +161,11 @@ class DownloadPipeline:
|
|||
for _, data in self.fetch_many(ids):
|
||||
unpacker.feed(data)
|
||||
items = [Item(internal_dict=item) for item in unpacker]
|
||||
if filter:
|
||||
items = [item for item in items if filter(item)]
|
||||
for item in items:
|
||||
if 'chunks' in item:
|
||||
item.chunks = [ChunkListEntry(*e) for e in item.chunks]
|
||||
if filter:
|
||||
items = [item for item in items if filter(item)]
|
||||
if preload:
|
||||
for item in items:
|
||||
if 'chunks' in item:
|
||||
|
@ -422,7 +422,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
return stats
|
||||
|
||||
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
|
||||
hardlink_masters=None, original_path=None, pi=None):
|
||||
hardlink_masters=None, stripped_components=0, original_path=None, pi=None):
|
||||
"""
|
||||
Extract archive item.
|
||||
|
||||
|
@ -432,9 +432,11 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
:param stdout: write extracted data to stdout
|
||||
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
|
||||
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
|
||||
:param stripped_components: stripped leading path components to correct hard link extraction
|
||||
:param original_path: 'path' key as stored in archive
|
||||
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
|
||||
"""
|
||||
hardlink_masters = hardlink_masters or {}
|
||||
has_damaged_chunks = 'chunks_healthy' in item
|
||||
if dry_run or stdout:
|
||||
if 'chunks' in item:
|
||||
|
@ -473,11 +475,11 @@ Number of files: {0.stats.nfiles}'''.format(
|
|||
os.makedirs(os.path.dirname(path))
|
||||
# Hard link?
|
||||
if 'source' in item:
|
||||
source = os.path.join(dest, item.source)
|
||||
source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
|
||||
with backup_io():
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
if not hardlink_masters:
|
||||
if item.source not in hardlink_masters:
|
||||
os.link(source, path)
|
||||
return
|
||||
item.chunks, link_target = hardlink_masters[item.source]
|
||||
|
|
|
@ -417,15 +417,17 @@ class Archiver:
|
|||
self.print_file_status(status, path)
|
||||
|
||||
@staticmethod
|
||||
def build_filter(matcher, is_hardlink_master, strip_components=0):
|
||||
def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
|
||||
if strip_components:
|
||||
def item_filter(item):
|
||||
return (is_hardlink_master(item) or
|
||||
matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]))
|
||||
matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])
|
||||
peek_and_store_hardlink_masters(item, matched)
|
||||
return matched
|
||||
else:
|
||||
def item_filter(item):
|
||||
return (is_hardlink_master(item) or
|
||||
matcher.match(item.path))
|
||||
matched = matcher.match(item.path)
|
||||
peek_and_store_hardlink_masters(item, matched)
|
||||
return matched
|
||||
return item_filter
|
||||
|
||||
@with_repository()
|
||||
|
@ -450,25 +452,22 @@ class Archiver:
|
|||
partial_extract = not matcher.empty() or strip_components
|
||||
hardlink_masters = {} if partial_extract else None
|
||||
|
||||
def item_is_hardlink_master(item):
|
||||
return (partial_extract and stat.S_ISREG(item.mode) and
|
||||
item.get('hardlink_master', True) and 'source' not in item)
|
||||
def peek_and_store_hardlink_masters(item, matched):
|
||||
if (partial_extract and not matched and stat.S_ISREG(item.mode) and
|
||||
item.get('hardlink_master', True) and 'source' not in item):
|
||||
hardlink_masters[item.get('path')] = (item.get('chunks'), None)
|
||||
|
||||
filter = self.build_filter(matcher, item_is_hardlink_master, strip_components)
|
||||
filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
|
||||
if progress:
|
||||
progress_logger = logging.getLogger(ProgressIndicatorPercent.LOGGER)
|
||||
progress_logger.info('Calculating size')
|
||||
extracted_size = sum(item.file_size() for item in archive.iter_items(filter))
|
||||
extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter))
|
||||
pi = ProgressIndicatorPercent(total=extracted_size, msg='Extracting files %5.1f%%', step=0.1)
|
||||
else:
|
||||
pi = None
|
||||
|
||||
for item in archive.iter_items(filter, preload=True):
|
||||
orig_path = item.path
|
||||
if item_is_hardlink_master(item):
|
||||
hardlink_masters[orig_path] = (item.get('chunks'), None)
|
||||
if not matcher.match(item.path):
|
||||
continue
|
||||
if strip_components:
|
||||
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
|
||||
if not args.dry_run:
|
||||
|
@ -489,7 +488,7 @@ class Archiver:
|
|||
archive.extract_item(item, restore_attrs=False)
|
||||
else:
|
||||
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
|
||||
original_path=orig_path, pi=pi)
|
||||
stripped_components=strip_components, original_path=orig_path, pi=pi)
|
||||
except BackupOSError as e:
|
||||
self.print_warning('%s: %s', remove_surrogates(orig_path), e)
|
||||
|
||||
|
|
|
@ -157,10 +157,13 @@ class Item(PropDict):
|
|||
|
||||
part = PropDict._make_property('part', int)
|
||||
|
||||
def file_size(self):
|
||||
if 'chunks' not in self:
|
||||
def file_size(self, hardlink_masters=None):
|
||||
hardlink_masters = hardlink_masters or {}
|
||||
chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
|
||||
chunks = self.get('chunks', chunks)
|
||||
if chunks is None:
|
||||
return 0
|
||||
return sum(chunk.size for chunk in self.chunks)
|
||||
return sum(chunk.size for chunk in chunks)
|
||||
|
||||
|
||||
class EncryptedKey(PropDict):
|
||||
|
|
|
@ -2204,25 +2204,25 @@ def test_compare_chunk_contents():
|
|||
|
||||
class TestBuildFilter:
|
||||
@staticmethod
|
||||
def item_is_hardlink_master(item):
|
||||
return False
|
||||
def peek_and_store_hardlink_masters(item, matched):
|
||||
pass
|
||||
|
||||
def test_basic(self):
|
||||
matcher = PatternMatcher()
|
||||
matcher.add([parse_pattern('included')], True)
|
||||
filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
|
||||
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
|
||||
assert filter(Item(path='included'))
|
||||
assert filter(Item(path='included/file'))
|
||||
assert not filter(Item(path='something else'))
|
||||
|
||||
def test_empty(self):
|
||||
matcher = PatternMatcher(fallback=True)
|
||||
filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
|
||||
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
|
||||
assert filter(Item(path='anything'))
|
||||
|
||||
def test_strip_components(self):
|
||||
matcher = PatternMatcher(fallback=True)
|
||||
filter = Archiver.build_filter(matcher, self.item_is_hardlink_master, strip_components=1)
|
||||
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, strip_components=1)
|
||||
assert not filter(Item(path='shallow'))
|
||||
assert not filter(Item(path='shallow/')) # can this even happen? paths are normalized...
|
||||
assert filter(Item(path='deep enough/file'))
|
||||
|
|
Loading…
Reference in New Issue