Merge pull request #5572 from ThomasWaldmann/export-tar-fix-5568-master

fix memory leak in export-tar with ssh: repo (fwd port to master)
2025-02-22 14:11:27 +00:00 · 2020-12-18 11:11:06 +01:00 · 2020-12-18 11:11:06 +01:00 · 18f0729ed8
commit 18f0729ed8
parent 908ea73949 24d3400dd4
2 changed files with 8 additions and 4 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -523,6 +523,8 @@ def item_filter(self, item, filter=None):
        return filter(item) if filter else True

    def iter_items(self, filter=None, partial_extract=False, preload=False, hardlink_masters=None):
+        # note: when calling this with preload=True, later fetch_many() must be called with
+        # is_preloaded=True or the RemoteRepository code will leak memory!
        assert not (filter and partial_extract and preload) or hardlink_masters is not None
        for item in self.pipeline.unpack_many(self.metadata.items, partial_extract=partial_extract,
                                              preload=preload, hardlink_masters=hardlink_masters,
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@ -1013,8 +1013,8 @@ def _export_tar(self, args, archive, tar):
        hardlink_masters = {} if partial_extract else None

        def peek_and_store_hardlink_masters(item, matched):
-            if (partial_extract and not matched and hardlinkable(item.mode) and
-                    item.get('hardlink_master', True) and 'source' not in item):
+            if ((partial_extract and not matched and hardlinkable(item.mode)) and
+                    (item.get('hardlink_master', True) and 'source' not in item)):
                hardlink_masters[item.get('path')] = (item.get('chunks'), None)

        filter = self.build_filter(matcher, peek_and_store_hardlink_masters, strip_components)
@ -1031,7 +1031,8 @@ def item_content_stream(item):
            """
            Return a file-like object that reads from the chunks of *item*.
            """
-            chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks])
+            chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks],
+                                                         is_preloaded=True)
            if pi:
                info = [remove_surrogates(item.path)]
                return ChunkIteratorFileWrapper(chunk_iterator,
@ -1115,7 +1116,8 @@ def item_to_tarinfo(item, original_path):
                return None, stream
            return tarinfo, stream

-        for item in archive.iter_items(filter, preload=True, hardlink_masters=hardlink_masters):
+        for item in archive.iter_items(filter, partial_extract=partial_extract,
+                                       preload=True, hardlink_masters=hardlink_masters):
            orig_path = item.path
            if strip_components:
                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])