From f33f318d816505161d1449a02ddfdeb97d6fe80a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 2 May 2019 21:02:26 +0200 Subject: [PATCH] preload chunks for hardlink slaves w/o preloaded master, fixes #4350 also split the hardlink extraction test into 2 tests. --- src/borg/archive.py | 42 +++++++++++++++++++++++++++++----- src/borg/archiver.py | 5 ++-- src/borg/testsuite/archiver.py | 20 +++++++++------- 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 5b5ce0c4b..5f9cb463e 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -248,7 +248,7 @@ class DownloadPipeline: self.repository = repository self.key = key - def unpack_many(self, ids, filter=None, preload=False): + def unpack_many(self, ids, filter=None, partial_extract=False, preload=False, hardlink_masters=None): """ Return iterator of items. @@ -265,12 +265,40 @@ class DownloadPipeline: for item in items: if 'chunks' in item: item.chunks = [ChunkListEntry(*e) for e in item.chunks] + + def preload(chunks): + self.repository.preload([c.id for c in chunks]) + if filter: items = [item for item in items if filter(item)] + if preload: - for item in items: - if 'chunks' in item: - self.repository.preload([c.id for c in item.chunks]) + if filter and partial_extract: + # if we do only a partial extraction, it gets a bit + # complicated with computing the preload items: if a hardlink master item is not + # selected (== not extracted), we will still need to preload its chunks if a + # corresponding hardlink slave is selected (== is extracted). + # due to a side effect of the filter() call, we now have hardlink_masters dict populated. + masters_preloaded = set() + for item in items: + if 'chunks' in item: # regular file, maybe a hardlink master + preload(item.chunks) + # if this is a hardlink master, remember that we already preloaded it: + if 'source' not in item and hardlinkable(item.mode) and item.get('hardlink_master', True): + masters_preloaded.add(item.path) + elif 'source' in item and hardlinkable(item.mode): # hardlink slave + source = item.source + if source not in masters_preloaded: + # we only need to preload *once* (for the 1st selected slave) + chunks, _ = hardlink_masters[source] + preload(chunks) + masters_preloaded.add(source) + else: + # easy: we do not have a filter, thus all items are selected, thus we need to preload all chunks. + for item in items: + if 'chunks' in item: + preload(item.chunks) + for item in items: yield item @@ -486,8 +514,10 @@ Utilization of max. archive size: {csize_max:.0%} return False return filter(item) if filter else True - def iter_items(self, filter=None, preload=False): - for item in self.pipeline.unpack_many(self.metadata.items, preload=preload, + def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None): + assert not (filter and partial_extract and preload) or hardlink_masters is not None + for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract, + preload=preload, hardlink_masters=hardlink_masters, filter=lambda item: self.item_filter(item, filter)): yield item diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 5a72fcc10..00de0beb4 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -769,7 +769,8 @@ class Archiver: else: pi = None - for item in archive.iter_items(filter, preload=True): + for item in archive.iter_items(filter, partial_extract=partial_extract, + preload=True, hardlink_masters=hardlink_masters): orig_path = item.path if strip_components: item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) @@ -1011,7 +1012,7 @@ class Archiver: return None, stream return tarinfo, stream - for item in archive.iter_items(filter, preload=True): + for item in archive.iter_items(filter, preload=True, hardlink_masters=hardlink_masters): orig_path = item.path if strip_components: item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index e15541d6b..82bb2db97 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -824,7 +824,18 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' @requires_hardlinks - def test_extract_hardlinks(self): + def test_extract_hardlinks1(self): + self._extract_hardlinks_setup() + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + assert os.stat('input/source').st_nlink == 4 + assert os.stat('input/abba').st_nlink == 4 + assert os.stat('input/dir1/hardlink').st_nlink == 4 + assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 + assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' + + @requires_hardlinks + def test_extract_hardlinks2(self): self._extract_hardlinks_setup() with changedir('output'): self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') @@ -840,13 +851,6 @@ class ArchiverTestCase(ArchiverTestCaseBase): assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' assert os.stat('input/dir1/aaaa').st_nlink == 2 assert os.stat('input/dir1/source2').st_nlink == 2 - with changedir('output'): - self.cmd('extract', self.repository_location + '::test') - assert os.stat('input/source').st_nlink == 4 - assert os.stat('input/abba').st_nlink == 4 - assert os.stat('input/dir1/hardlink').st_nlink == 4 - assert os.stat('input/dir1/subdir/hardlink').st_nlink == 4 - assert open('input/dir1/subdir/hardlink', 'rb').read() == b'123456' def test_extract_include_exclude(self): self.cmd('init', '--encryption=repokey', self.repository_location)