mirror of
https://github.com/borgbackup/borg.git
synced 2025-03-14 16:11:43 +00:00
extract: When doing a partial restore don't leak prefetched chunks.
The filter function passed to iter_items (with preload=True) may never return True for items that are not really extracted later because that would leak prefetched items. For restoring hard linked files the item containing the actual chunks might not be matched or implicitly removed from the restore by strip_components. For this reason the chunk list or all items that can potentially be used as hardlink target needs to be stored. To achive both requirements at the same time the filter function needs to store the needed information for the hardlinks while not returning True just because it could be a hardlink target. Known problems: When using progress indication the calculated extracted_size now can be smaller than the actual extracted size in presence of hard links (master is not restored) instead of bigger (potential master not used in restore).
This commit is contained in:
parent
e0b8635098
commit
972392e290
2 changed files with 12 additions and 15 deletions
|
@ -161,11 +161,11 @@ class DownloadPipeline:
|
|||
for _, data in self.fetch_many(ids):
|
||||
unpacker.feed(data)
|
||||
items = [Item(internal_dict=item) for item in unpacker]
|
||||
if filter:
|
||||
items = [item for item in items if filter(item)]
|
||||
for item in items:
|
||||
if 'chunks' in item:
|
||||
item.chunks = [ChunkListEntry(*e) for e in item.chunks]
|
||||
if filter:
|
||||
items = [item for item in items if filter(item)]
|
||||
if preload:
|
||||
for item in items:
|
||||
if 'chunks' in item:
|
||||
|
|
|
@ -417,15 +417,15 @@ class Archiver:
|
|||
self.print_file_status(status, path)
|
||||
|
||||
@staticmethod
|
||||
def build_filter(matcher, is_hardlink_master, strip_components=0):
|
||||
def build_filter(matcher, peek_and_store_hardlink_masters, strip_components=0):
|
||||
if strip_components:
|
||||
def item_filter(item):
|
||||
return (is_hardlink_master(item) or
|
||||
matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:]))
|
||||
peek_and_store_hardlink_masters(item)
|
||||
return matcher.match(item.path) and os.sep.join(item.path.split(os.sep)[strip_components:])
|
||||
else:
|
||||
def item_filter(item):
|
||||
return (is_hardlink_master(item) or
|
||||
matcher.match(item.path))
|
||||
peek_and_store_hardlink_masters(item)
|
||||
return matcher.match(item.path)
|
||||
return item_filter
|
||||
|
||||
@with_repository()
|
||||
|
@ -450,11 +450,12 @@ class Archiver:
|
|||
partial_extract = not matcher.empty() or strip_components
|
||||
hardlink_masters = {} if partial_extract else None
|
||||
|
||||
def item_is_hardlink_master(item):
|
||||
return (partial_extract and stat.S_ISREG(item.mode) and
|
||||
item.get('hardlink_master', True) and 'source' not in item)
|
||||
def peek_and_store_hardlink_masters(item):
|
||||
if (partial_extract and stat.S_ISREG(item.mode) and
|
||||
item.get('hardlink_master', True) and 'source' not in item):
|
||||
hardlink_masters[item.get('path')] = (item.get('chunks'), None)
|
||||
|
||||
filter = self.build_filter(matcher, item_is_hardlink_master, strip_components)
|
||||
filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
|
||||
if progress:
|
||||
progress_logger = logging.getLogger(ProgressIndicatorPercent.LOGGER)
|
||||
progress_logger.info('Calculating size')
|
||||
|
@ -465,10 +466,6 @@ class Archiver:
|
|||
|
||||
for item in archive.iter_items(filter, preload=True):
|
||||
orig_path = item.path
|
||||
if item_is_hardlink_master(item):
|
||||
hardlink_masters[orig_path] = (item.get('chunks'), None)
|
||||
if not matcher.match(item.path):
|
||||
continue
|
||||
if strip_components:
|
||||
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
|
||||
if not args.dry_run:
|
||||
|
|
Loading…
Add table
Reference in a new issue