Merge pull request #1526 from textshell/fix/preload-leak-hardlink

extract: When doing a partial restore don't leak prefetched chunks.
This commit is contained in:
enkore 2016-08-26 22:06:48 +02:00 committed by GitHub
commit 47404dfde5
4 changed files with 32 additions and 28 deletions

View File

@ -161,11 +161,11 @@ class DownloadPipeline:
for _, data in self.fetch_many(ids):
unpacker.feed(data)
items = [Item(internal_dict=item) for item in unpacker]
if filter:
items = [item for item in items if filter(item)]
for item in items:
if 'chunks' in item:
item.chunks = [ChunkListEntry(*e) for e in item.chunks]
if filter:
items = [item for item in items if filter(item)]
if preload:
for item in items:
if 'chunks' in item:
@ -422,7 +422,7 @@ Number of files: {0.stats.nfiles}'''.format(
return stats
def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sparse=False,
hardlink_masters=None, original_path=None, pi=None):
hardlink_masters=None, stripped_components=0, original_path=None, pi=None):
"""
Extract archive item.
@ -432,9 +432,11 @@ Number of files: {0.stats.nfiles}'''.format(
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param stripped_components: stripped leading path components to correct hard link extraction
:param original_path: 'path' key as stored in archive
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
"""
hardlink_masters = hardlink_masters or {}
has_damaged_chunks = 'chunks_healthy' in item
if dry_run or stdout:
if 'chunks' in item:
@ -473,11 +475,11 @@ Number of files: {0.stats.nfiles}'''.format(
os.makedirs(os.path.dirname(path))
# Hard link?
if 'source' in item:
source = os.path.join(dest, item.source)
source = os.path.join(dest, *item.source.split(os.sep)[stripped_components:])
with backup_io():
if os.path.exists(path):
os.unlink(path)
if not hardlink_masters:
if item.source not in hardlink_masters:
os.link(source, path)
return
item.chunks, link_target = hardlink_masters[item.source]

View File

@ -417,15 +417,17 @@ class Archiver:
self.print_file_status(status, path)
@staticmethod
def build_filter(matcher, is_hardlink_master, strip_components=0):
def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
if strip_components:
def item_filter(item):
return (is_hardlink_master(item) or
matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]))
matched = matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])
peek_and_store_hardlink_masters(item, matched)
return matched
else:
def item_filter(item):
return (is_hardlink_master(item) or
matcher.match(item.path))
matched = matcher.match(item.path)
peek_and_store_hardlink_masters(item, matched)
return matched
return item_filter
@with_repository()
@ -450,25 +452,22 @@ class Archiver:
partial_extract = not matcher.empty() or strip_components
hardlink_masters = {} if partial_extract else None
def item_is_hardlink_master(item):
return (partial_extract and stat.S_ISREG(item.mode) and
item.get('hardlink_master', True) and 'source' not in item)
def peek_and_store_hardlink_masters(item, matched):
if (partial_extract and not matched and stat.S_ISREG(item.mode) and
item.get('hardlink_master', True) and 'source' not in item):
hardlink_masters[item.get('path')] = (item.get('chunks'), None)
filter = self.build_filter(matcher, item_is_hardlink_master, strip_components)
filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
if progress:
progress_logger = logging.getLogger(ProgressIndicatorPercent.LOGGER)
progress_logger.info('Calculating size')
extracted_size = sum(item.file_size() for item in archive.iter_items(filter))
extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter))
pi = ProgressIndicatorPercent(total=extracted_size, msg='Extracting files %5.1f%%', step=0.1)
else:
pi = None
for item in archive.iter_items(filter, preload=True):
orig_path = item.path
if item_is_hardlink_master(item):
hardlink_masters[orig_path] = (item.get('chunks'), None)
if not matcher.match(item.path):
continue
if strip_components:
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
if not args.dry_run:
@ -489,7 +488,7 @@ class Archiver:
archive.extract_item(item, restore_attrs=False)
else:
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
original_path=orig_path, pi=pi)
stripped_components=strip_components, original_path=orig_path, pi=pi)
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e)

View File

@ -157,10 +157,13 @@ class Item(PropDict):
part = PropDict._make_property('part', int)
def file_size(self):
if 'chunks' not in self:
def file_size(self, hardlink_masters=None):
hardlink_masters = hardlink_masters or {}
chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
chunks = self.get('chunks', chunks)
if chunks is None:
return 0
return sum(chunk.size for chunk in self.chunks)
return sum(chunk.size for chunk in chunks)
class EncryptedKey(PropDict):

View File

@ -2204,25 +2204,25 @@ def test_compare_chunk_contents():
class TestBuildFilter:
@staticmethod
def item_is_hardlink_master(item):
return False
def peek_and_store_hardlink_masters(item, matched):
pass
def test_basic(self):
matcher = PatternMatcher()
matcher.add([parse_pattern('included')], True)
filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
assert filter(Item(path='included'))
assert filter(Item(path='included/file'))
assert not filter(Item(path='something else'))
def test_empty(self):
matcher = PatternMatcher(fallback=True)
filter = Archiver.build_filter(matcher, self.item_is_hardlink_master)
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
assert filter(Item(path='anything'))
def test_strip_components(self):
matcher = PatternMatcher(fallback=True)
filter = Archiver.build_filter(matcher, self.item_is_hardlink_master, strip_components=1)
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, strip_components=1)
assert not filter(Item(path='shallow'))
assert not filter(Item(path='shallow/')) # can this even happen? paths are normalized...
assert filter(Item(path='deep enough/file'))