Merge pull request #7309 from ThomasWaldmann/no-part-files-in-final-archive

remove part files from final archive
2023-02-03 01:33:47 +01:00 · 2023-02-03 01:33:47 +01:00 · 9b7647c89d
parent f25f6a8e33 d2cc167afd
commit 9b7647c89d
23 changed files with 118 additions and 211 deletions
--- a/docs/faq.rst
+++ b/docs/faq.rst
@ -132,10 +132,10 @@ During a backup, a special checkpoint archive named ``<archive-name>.checkpoint`
 is saved at every checkpoint interval (the default value for this is 30
 minutes) containing all the data backed-up until that point.

-This checkpoint archive is a valid archive,
-but it is only a partial backup (not all files that you wanted to back up are
-contained in it). Having it in the repo until a successful, full backup is
-completed is useful because it references all the transmitted chunks up
+This checkpoint archive is a valid archive, but it is only a partial backup
+(not all files that you wanted to back up are contained in it and the last file
+in it might be a partial file). Having it in the repo until a successful, full
+backup is completed is useful because it references all the transmitted chunks up
 to the checkpoint. This means that in case of an interruption, you only need to
 retransfer the data since the last checkpoint.

@ -154,14 +154,12 @@ Once your backup has finished successfully, you can delete all
 ``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
 also care for deleting unneeded checkpoints.

-Note: the checkpointing mechanism creates hidden, partial files in an archive,
-so that checkpoints even work while a big file is being processed.
-They are named ``<filename>.borg_part_<N>`` and all operations usually ignore
-these files, but you can make them considered by giving the option
-``--consider-part-files``. You usually only need that option if you are
-really desperate (e.g. if you have no completed backup of that file and you'd
-rather get a partial file extracted than nothing). You do **not** want to give
-that option under any normal circumstances.
+Note: the checkpointing mechanism may create a partial (truncated) last file
+in a checkpoint archive named ``<filename>.borg_part``. Such partial files
+won't be contained in the final archive.
+This is done so that checkpoints work cleanly and promptly while a big
+file is being processed.
+

 How can I back up huge file(s) over a unstable connection?
 ---------------------------------------------------------
@ -171,10 +169,8 @@ Yes. For more details, see :ref:`checkpoints_parts`.
 How can I restore huge file(s) over an unstable connection?
 -----------------------------------------------------------

-If you cannot manage to extract the whole big file in one go, you can extract
-all the part files and manually concatenate them together.
-
-For more details, see :ref:`checkpoints_parts`.
+Try using ``borg mount`` and ``rsync`` (or a similar tool that supports
+resuming a partial file copy from what's already copied).

 How can I switch append-only mode on and off?
 -----------------------------------------------------------------------------------------------------------------------------------
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -58,7 +58,6 @@ class Statistics:
        self.output_json = output_json
        self.iec = iec
        self.osize = self.usize = self.nfiles = 0
-        self.osize_parts = self.usize_parts = self.nfiles_parts = 0
        self.last_progress = 0  # timestamp when last progress was shown
        self.files_stats = defaultdict(int)
        self.chunking_time = 0.0
@ -66,15 +65,10 @@ class Statistics:
        self.rx_bytes = 0
        self.tx_bytes = 0

-    def update(self, size, unique, part=False):
-        if not part:
-            self.osize += size
-            if unique:
-                self.usize += size
-        else:
-            self.osize_parts += size
-            if unique:
-                self.usize_parts += size
+    def update(self, size, unique):
+        self.osize += size
+        if unique:
+            self.usize += size

    def __add__(self, other):
        if not isinstance(other, Statistics):
@ -83,9 +77,6 @@ class Statistics:
        stats.osize = self.osize + other.osize
        stats.usize = self.usize + other.usize
        stats.nfiles = self.nfiles + other.nfiles
-        stats.osize_parts = self.osize_parts + other.osize_parts
-        stats.usize_parts = self.usize_parts + other.usize_parts
-        stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
        stats.chunking_time = self.chunking_time + other.chunking_time
        stats.hashing_time = self.hashing_time + other.hashing_time
        for key in other.files_stats:
@ -134,20 +125,13 @@ Bytes sent to remote: {stats.tx_bytes}
        }

    def as_raw_dict(self):
-        return {
-            "size": self.osize,
-            "nfiles": self.nfiles,
-            "size_parts": self.osize_parts,
-            "nfiles_parts": self.nfiles_parts,
-        }
+        return {"size": self.osize, "nfiles": self.nfiles}

    @classmethod
    def from_raw_dict(cls, **kw):
        self = cls()
        self.osize = kw["size"]
        self.nfiles = kw["nfiles"]
-        self.osize_parts = kw["size_parts"]
-        self.nfiles_parts = kw["nfiles_parts"]
        return self

    @property
@ -353,6 +337,7 @@ class ChunkBuffer:
        self.chunks = []
        self.key = key
        self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, sparse=False)
+        self.saved_chunks_len = None

    def add(self, item):
        self.buffer.write(self.packer.pack(item.as_dict()))
@ -392,6 +377,18 @@ class ChunkBuffer:
    def is_full(self):
        return self.buffer.tell() > self.BUFFER_SIZE

+    def save_chunks_state(self):
+        # as we only append to self.chunks, remembering the current length is good enough
+        self.saved_chunks_len = len(self.chunks)
+
+    def restore_chunks_state(self):
+        scl = self.saved_chunks_len
+        assert scl is not None, "forgot to call save_chunks_state?"
+        tail_chunks = self.chunks[scl:]
+        del self.chunks[scl:]
+        self.saved_chunks_len = None
+        return tail_chunks
+

 class CacheChunkBuffer(ChunkBuffer):
    def __init__(self, cache, key, stats, chunker_params=ITEMS_CHUNKER_PARAMS):
@ -484,7 +481,6 @@ class Archive:
        start=None,
        start_monotonic=None,
        end=None,
-        consider_part_files=False,
        log_json=False,
        iec=False,
    ):
@ -519,7 +515,6 @@ class Archive:
        if end is None:
            end = archive_ts_now()
        self.end = end
-        self.consider_part_files = consider_part_files
        self.pipeline = DownloadPipeline(self.repository, self.repo_objs)
        self.create = create
        if self.create:
@ -629,9 +624,6 @@ Duration: {0.duration}
        return "Archive(%r)" % self.name

    def item_filter(self, item, filter=None):
-        if not self.consider_part_files and "part" in item:
-            # this is a part(ial) file, we usually don't want to consider it.
-            return False
        return filter(item) if filter else True

    def iter_items(self, filter=None, preload=False):
@ -649,6 +641,15 @@ Duration: {0.duration}
            stats.show_progress(item=item, dt=0.2)
        self.items_buffer.add(item)

+    def prepare_checkpoint(self):
+        # we need to flush the archive metadata stream to repo chunks, so that
+        # we have the metadata stream chunks WITHOUT the part file item we add later.
+        # The part file item will then get into its own metadata stream chunk, which we
+        # can easily NOT include into the next checkpoint or the final archive.
+        self.items_buffer.flush(flush=True)
+        # remember the current state of self.chunks, which corresponds to the flushed chunks
+        self.items_buffer.save_chunks_state()
+
    def write_checkpoint(self):
        metadata = self.save(self.checkpoint_name)
        # that .save() has committed the repo.
@ -660,6 +661,11 @@ Duration: {0.duration}
        self.cache.chunk_decref(self.id, self.stats)
        for id in metadata.item_ptrs:
            self.cache.chunk_decref(id, self.stats)
+        # also get rid of that part item, we do not want to have it in next checkpoint or final archive
+        tail_chunks = self.items_buffer.restore_chunks_state()
+        # tail_chunks contain the tail of the archive items metadata stream, not needed for next commit.
+        for id in tail_chunks:
+            self.cache.chunk_decref(id, self.stats)

    def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
        name = name or self.name
@ -694,14 +700,7 @@ Duration: {0.duration}
        # because borg info relies on them. so, either use the given stats (from args)
        # or fall back to self.stats if it was not given.
        stats = stats or self.stats
-        metadata.update(
-            {
-                "size": stats.osize,
-                "nfiles": stats.nfiles,
-                "size_parts": stats.osize_parts,
-                "nfiles_parts": stats.nfiles_parts,
-            }
-        )
+        metadata.update({"size": stats.osize, "nfiles": stats.nfiles})
        metadata.update(additional_metadata or {})
        metadata = ArchiveItem(metadata)
        data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive")
@ -751,12 +750,9 @@ Duration: {0.duration}
            pi.finish()

        stats = Statistics(iec=self.iec)
-        stats.usize = unique_size  # the part files use same chunks as the full file
+        stats.usize = unique_size
        stats.nfiles = self.metadata.nfiles
        stats.osize = self.metadata.size
-        if self.consider_part_files:
-            stats.nfiles += self.metadata.nfiles_parts
-            stats.osize += self.metadata.size_parts
        return stats

    @contextmanager
@ -1038,9 +1034,9 @@ Duration: {0.duration}
                error = True
                return exception_ignored  # must not return None here

-        def chunk_decref(id, stats, part=False):
+        def chunk_decref(id, stats):
            try:
-                self.cache.chunk_decref(id, stats, wait=False, part=part)
+                self.cache.chunk_decref(id, stats, wait=False)
            except KeyError:
                cid = bin_to_hex(id)
                raise ChunksIndexError(cid)
@ -1064,9 +1060,8 @@ Duration: {0.duration}
                    for item in unpacker:
                        item = Item(internal_dict=item)
                        if "chunks" in item:
-                            part = not self.consider_part_files and "part" in item
                            for chunk_id, size in item.chunks:
-                                chunk_decref(chunk_id, stats, part=part)
+                                chunk_decref(chunk_id, stats)
                except (TypeError, ValueError):
                    # if items metadata spans multiple chunks and one chunk got dropped somehow,
                    # it could be that unpacker yields bad types
@ -1234,10 +1229,22 @@ def cached_hash(chunk, id_hash):
 class ChunksProcessor:
    # Processes an iterator of chunks for an Item

-    def __init__(self, *, key, cache, add_item, write_checkpoint, checkpoint_interval, checkpoint_volume, rechunkify):
+    def __init__(
+        self,
+        *,
+        key,
+        cache,
+        add_item,
+        prepare_checkpoint,
+        write_checkpoint,
+        checkpoint_interval,
+        checkpoint_volume,
+        rechunkify,
+    ):
        self.key = key
        self.cache = cache
        self.add_item = add_item
+        self.prepare_checkpoint = prepare_checkpoint
        self.write_checkpoint = write_checkpoint
        self.rechunkify = rechunkify
        # time interval based checkpointing
@ -1248,38 +1255,34 @@ class ChunksProcessor:
        self.current_volume = 0
        self.last_volume_checkpoint = 0

-    def write_part_file(self, item, from_chunk, number):
+    def write_part_file(self, item):
+        self.prepare_checkpoint()
        item = Item(internal_dict=item.as_dict())
-        length = len(item.chunks)
-        # the item should only have the *additional* chunks we processed after the last partial item:
-        item.chunks = item.chunks[from_chunk:]
        # for borg recreate, we already have a size member in the source item (giving the total file size),
        # but we consider only a part of the file here, thus we must recompute the size from the chunks:
        item.get_size(memorize=True, from_chunks=True)
-        item.path += ".borg_part_%d" % number
-        item.part = number
-        number += 1
+        item.path += ".borg_part"
        self.add_item(item, show_progress=False)
        self.write_checkpoint()
-        return length, number

-    def maybe_checkpoint(self, item, from_chunk, part_number, forced=False):
+    def maybe_checkpoint(self, item):
+        checkpoint_done = False
        sig_int_triggered = sig_int and sig_int.action_triggered()
        if (
-            forced
-            or sig_int_triggered
+            sig_int_triggered
            or (self.checkpoint_interval and time.monotonic() - self.last_checkpoint > self.checkpoint_interval)
            or (self.checkpoint_volume and self.current_volume - self.last_volume_checkpoint >= self.checkpoint_volume)
        ):
            if sig_int_triggered:
                logger.info("checkpoint requested: starting checkpoint creation...")
-            from_chunk, part_number = self.write_part_file(item, from_chunk, part_number)
+            self.write_part_file(item)
+            checkpoint_done = True
            self.last_checkpoint = time.monotonic()
            self.last_volume_checkpoint = self.current_volume
            if sig_int_triggered:
                sig_int.action_completed()
                logger.info("checkpoint requested: finished checkpoint creation!")
-        return from_chunk, part_number
+        return checkpoint_done  # whether a checkpoint archive was created

    def process_file_chunks(self, item, cache, stats, show_progress, chunk_iter, chunk_processor=None):
        if not chunk_processor:
@ -1297,28 +1300,13 @@ class ChunksProcessor:
        # to get rid of .chunks_healthy, as it might not correspond to .chunks any more.
        if self.rechunkify and "chunks_healthy" in item:
            del item.chunks_healthy
-        from_chunk = 0
-        part_number = 1
        for chunk in chunk_iter:
            cle = chunk_processor(chunk)
            item.chunks.append(cle)
            self.current_volume += cle[1]
            if show_progress:
                stats.show_progress(item=item, dt=0.2)
-            from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=False)
-        else:
-            if part_number > 1:
-                if item.chunks[from_chunk:]:
-                    # if we already have created a part item inside this file, we want to put the final
-                    # chunks (if any) into a part item also (so all parts can be concatenated to get
-                    # the complete file):
-                    from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=True)
-
-                # if we created part files, we have referenced all chunks from the part files,
-                # but we also will reference the same chunks also from the final, complete file:
-                for chunk in item.chunks:
-                    cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True)
-                stats.nfiles_parts += part_number - 1
+            self.maybe_checkpoint(item)


 class FilesystemObjectProcessors:
@ -2474,6 +2462,7 @@ class ArchiveRecreater:
            cache=self.cache,
            key=self.key,
            add_item=target.add_item,
+            prepare_checkpoint=target.prepare_checkpoint,
            write_checkpoint=target.write_checkpoint,
            checkpoint_interval=self.checkpoint_interval,
            checkpoint_volume=self.checkpoint_volume,
--- a/src/borg/archiver/_common.py
+++ b/src/borg/archiver/_common.py
@ -149,7 +149,6 @@ def with_repository(
                        progress=getattr(args, "progress", False),
                        lock_wait=self.lock_wait,
                        cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
-                        consider_part_files=getattr(args, "consider_part_files", False),
                        iec=getattr(args, "iec", False),
                    ) as cache_:
                        return method(self, args, repository=repository, cache=cache_, **kwargs)
@ -214,7 +213,6 @@ def with_other_repository(manifest=False, cache=False, compatibility=None):
                        progress=False,
                        lock_wait=self.lock_wait,
                        cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
-                        consider_part_files=getattr(args, "consider_part_files", False),
                        iec=getattr(args, "iec", False),
                    ) as cache_:
                        kwargs["other_cache"] = cache_
@ -240,7 +238,6 @@ def with_archive(method):
            noacls=getattr(args, "noacls", False),
            noxattrs=getattr(args, "noxattrs", False),
            cache=kwargs.get("cache"),
-            consider_part_files=args.consider_part_files,
            log_json=args.log_json,
            iec=args.iec,
        )
@ -542,12 +539,6 @@ def define_common_options(add_common_option):
        type=int,
        help="set network upload buffer size in MiB. (default: 0=no buffer)",
    )
-    add_common_option(
-        "--consider-part-files",
-        dest="consider_part_files",
-        action="store_true",
-        help="treat part files like normal files (e.g. to list/extract them)",
-    )
    add_common_option(
        "--debug-profile",
        metavar="FILE",
--- a/src/borg/archiver/create_cmd.py
+++ b/src/borg/archiver/create_cmd.py
@ -255,6 +255,7 @@ class CreateMixIn:
                    cache=cache,
                    key=key,
                    add_item=archive.add_item,
+                    prepare_checkpoint=archive.prepare_checkpoint,
                    write_checkpoint=archive.write_checkpoint,
                    checkpoint_interval=args.checkpoint_interval,
                    checkpoint_volume=args.checkpoint_volume,
--- a/src/borg/archiver/debug_cmd.py
+++ b/src/borg/archiver/debug_cmd.py
@ -33,7 +33,7 @@ class DebugMixIn:
    def do_debug_dump_archive_items(self, args, repository, manifest):
        """dump (decrypted, decompressed) archive items metadata (not: data)"""
        repo_objs = manifest.repo_objs
-        archive = Archive(manifest, args.name, consider_part_files=args.consider_part_files)
+        archive = Archive(manifest, args.name)
        for i, item_id in enumerate(archive.metadata.items):
            _, data = repo_objs.parse(item_id, repository.get(item_id))
            filename = "%06d_%s.items" % (i, bin_to_hex(item_id))
--- a/src/borg/archiver/delete_cmd.py
+++ b/src/borg/archiver/delete_cmd.py
@ -79,9 +79,7 @@ class DeleteMixIn:
                        logger_list.info(msg_delete.format(format_archive(archive_info), i, len(archive_names)))

                    if not dry_run:
-                        archive = Archive(
-                            manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files
-                        )
+                        archive = Archive(manifest, archive_name, cache=cache)
                        archive.delete(stats, progress=args.progress, forced=args.forced)
                        checkpointed = self.maybe_checkpoint(
                            checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
--- a/src/borg/archiver/diff_cmd.py
+++ b/src/borg/archiver/diff_cmd.py
@ -27,7 +27,7 @@ class DiffMixIn:
        print_output = print_json_output if args.json_lines else print_text_output

        archive1 = archive
-        archive2 = Archive(manifest, args.other_name, consider_part_files=args.consider_part_files)
+        archive2 = Archive(manifest, args.other_name)

        can_compare_chunk_ids = (
            archive1.metadata.get("chunker_params", False) == archive2.metadata.get("chunker_params", True)
--- a/src/borg/archiver/info_cmd.py
+++ b/src/borg/archiver/info_cmd.py
@ -24,9 +24,7 @@ class InfoMixIn:
        output_data = []

        for i, archive_name in enumerate(archive_names, 1):
-            archive = Archive(
-                manifest, archive_name, cache=cache, consider_part_files=args.consider_part_files, iec=args.iec
-            )
+            archive = Archive(manifest, archive_name, cache=cache, iec=args.iec)
            info = archive.info()
            if args.json:
                output_data.append(info)
--- a/src/borg/archiver/list_cmd.py
+++ b/src/borg/archiver/list_cmd.py
@ -27,7 +27,7 @@ class ListMixIn:
            format = "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}"

        def _list_inner(cache):
-            archive = Archive(manifest, args.name, cache=cache, consider_part_files=args.consider_part_files)
+            archive = Archive(manifest, args.name, cache=cache)

            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
            for item in archive.iter_items(lambda item: matcher.match(item.path)):
--- a/src/borg/archiver/prune_cmd.py
+++ b/src/borg/archiver/prune_cmd.py
@ -142,7 +142,7 @@ class PruneMixIn:
                    else:
                        archives_deleted += 1
                        log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
-                        archive = Archive(manifest, archive.name, cache, consider_part_files=args.consider_part_files)
+                        archive = Archive(manifest, archive.name, cache)
                        archive.delete(stats, forced=args.forced)
                        checkpointed = self.maybe_checkpoint(
                            checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
--- a/src/borg/archiver/tar_cmds.py
+++ b/src/borg/archiver/tar_cmds.py
@ -271,6 +271,7 @@ class TarMixIn:
            cache=cache,
            key=key,
            add_item=archive.add_item,
+            prepare_checkpoint=archive.prepare_checkpoint,
            write_checkpoint=archive.write_checkpoint,
            checkpoint_interval=args.checkpoint_interval,
            checkpoint_volume=args.checkpoint_volume,
--- a/src/borg/archiver/transfer_cmd.py
+++ b/src/borg/archiver/transfer_cmd.py
@ -89,6 +89,14 @@ class TransferMixIn:
                archive = Archive(manifest, name, cache=cache, create=True) if not dry_run else None
                upgrader.new_archive(archive=archive)
                for item in other_archive.iter_items():
+                    is_part = bool(item.get("part", False))
+                    if is_part:
+                        # borg 1.x created part files while checkpointing (in addition to the full
+                        # file in the final archive), like <filename>.borg_part_<part> with item.part >= 1.
+                        # borg2 archives do not have such special part items anymore.
+                        # so let's remove them from old archives also, considering there is no
+                        # code any more that deals with them in special ways (e.g. to get stats right).
+                        continue
                    if "chunks" in item:
                        chunks = []
                        for chunk_id, size in item.chunks:
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@ -404,7 +404,6 @@ class Cache:
        lock_wait=None,
        permit_adhoc_cache=False,
        cache_mode=FILES_CACHE_MODE_DISABLED,
-        consider_part_files=False,
        iec=False,
    ):
        def local():
@ -417,11 +416,10 @@ class Cache:
                iec=iec,
                lock_wait=lock_wait,
                cache_mode=cache_mode,
-                consider_part_files=consider_part_files,
            )

        def adhoc():
-            return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec, consider_part_files=consider_part_files)
+            return AdHocCache(manifest=manifest, lock_wait=lock_wait, iec=iec)

        if not permit_adhoc_cache:
            return local()
@ -464,14 +462,11 @@ Total chunks: {0.total_chunks}

        # XXX: this should really be moved down to `hashindex.pyx`
        total_size, unique_size, total_unique_chunks, total_chunks = self.chunks.summarize()
-        # the above values have the problem that they do not consider part files,
-        # thus the total_size might be too high (chunks referenced
-        # by the part files AND by the complete file).
        # since borg 1.2 we have new archive metadata telling the total size per archive,
        # so we can just sum up all archives to get the "all archives" stats:
        total_size = 0
        for archive_name in self.manifest.archives:
-            archive = Archive(self.manifest, archive_name, consider_part_files=self.consider_part_files)
+            archive = Archive(self.manifest, archive_name)
            stats = archive.calc_stats(self, want_unique=False)
            total_size += stats.osize
        stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict()
@ -498,7 +493,6 @@ class LocalCache(CacheStatsMixin):
        progress=False,
        lock_wait=None,
        cache_mode=FILES_CACHE_MODE_DISABLED,
-        consider_part_files=False,
        iec=False,
    ):
        """
@ -515,7 +509,6 @@ class LocalCache(CacheStatsMixin):
        self.repo_objs = manifest.repo_objs
        self.progress = progress
        self.cache_mode = cache_mode
-        self.consider_part_files = consider_part_files
        self.timestamp = None
        self.txn_active = False

@ -971,23 +964,23 @@ class LocalCache(CacheStatsMixin):
            )
        return refcount

-    def chunk_incref(self, id, stats, size=None, part=False):
+    def chunk_incref(self, id, stats, size=None):
        if not self.txn_active:
            self.begin_txn()
        count, _size = self.chunks.incref(id)
-        stats.update(_size, False, part=part)
+        stats.update(_size, False)
        return ChunkListEntry(id, _size)

-    def chunk_decref(self, id, stats, wait=True, part=False):
+    def chunk_decref(self, id, stats, wait=True):
        if not self.txn_active:
            self.begin_txn()
        count, size = self.chunks.decref(id)
        if count == 0:
            del self.chunks[id]
            self.repository.delete(id, wait=wait)
-            stats.update(-size, True, part=part)
+            stats.update(-size, True)
        else:
-            stats.update(-size, False, part=part)
+            stats.update(-size, False)

    def file_known_and_unchanged(self, hashed_path, path_hash, st):
        """
@ -1084,14 +1077,13 @@ All archives:                unknown              unknown              unknown
                       Unique chunks         Total chunks
 Chunk index:    {0.total_unique_chunks:20d}             unknown"""

-    def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, consider_part_files=False, iec=False):
+    def __init__(self, manifest, warn_if_unencrypted=True, lock_wait=None, iec=False):
        CacheStatsMixin.__init__(self, iec=iec)
        assert isinstance(manifest, Manifest)
        self.manifest = manifest
        self.repository = manifest.repository
        self.key = manifest.key
        self.repo_objs = manifest.repo_objs
-        self.consider_part_files = consider_part_files
        self._txn_active = False

        self.security_manager = SecurityManager(self.repository)
@ -1145,7 +1137,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
            self.chunks[id] = entry._replace(size=size)
        return entry.refcount

-    def chunk_incref(self, id, stats, size=None, part=False):
+    def chunk_incref(self, id, stats, size=None):
        if not self._txn_active:
            self.begin_txn()
        count, _size = self.chunks.incref(id)
@ -1153,19 +1145,19 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
        # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
        size = _size or size
        assert size
-        stats.update(size, False, part=part)
+        stats.update(size, False)
        return ChunkListEntry(id, size)

-    def chunk_decref(self, id, stats, wait=True, part=False):
+    def chunk_decref(self, id, stats, wait=True):
        if not self._txn_active:
            self.begin_txn()
        count, size = self.chunks.decref(id)
        if count == 0:
            del self.chunks[id]
            self.repository.delete(id, wait=wait)
-            stats.update(-size, True, part=part)
+            stats.update(-size, True)
        else:
-            stats.update(-size, False, part=part)
+            stats.update(-size, False)

    def commit(self):
        if not self._txn_active:
--- a/src/borg/cache_sync/cache_sync.c
+++ b/src/borg/cache_sync/cache_sync.c
@ -38,8 +38,6 @@ cache_sync_init(HashIndex *chunks)
    unpack_init(&ctx->ctx);
    /* needs to be set only once */
    ctx->ctx.user.chunks = chunks;
-    ctx->ctx.user.parts.size = 0;
-    ctx->ctx.user.parts.num_files = 0;
    ctx->ctx.user.totals.size = 0;
    ctx->ctx.user.totals.num_files = 0;
    ctx->buf = NULL;
@ -71,24 +69,12 @@ cache_sync_num_files_totals(const CacheSyncCtx *ctx)
    return ctx->ctx.user.totals.num_files;
 }

-static uint64_t
-cache_sync_num_files_parts(const CacheSyncCtx *ctx)
-{
-    return ctx->ctx.user.parts.num_files;
-}
-
 static uint64_t
 cache_sync_size_totals(const CacheSyncCtx *ctx)
 {
    return ctx->ctx.user.totals.size;
 }

-static uint64_t
-cache_sync_size_parts(const CacheSyncCtx *ctx)
-{
-    return ctx->ctx.user.parts.size;
-}
-
 /**
 * feed data to the cache synchronizer
 * 0 = abort, 1 = continue
--- a/src/borg/cache_sync/unpack.h
+++ b/src/borg/cache_sync/unpack.h
@ -40,7 +40,7 @@
 #endif

 typedef struct unpack_user {
-    /* Item.chunks and Item.part are at the top level; we don't care about anything else,
+    /* Item.chunks is at the top level; we don't care about anything else,
     * only need to track the current level to navigate arbitrary and unknown structure.
     * To discern keys from everything else on the top level we use expect_map_item_end.
     */
@ -58,15 +58,12 @@ typedef struct unpack_user {
     */
    int inside_chunks;

-    /* is this item a .part file (created for checkpointing inside files)? */
-    int part;
-
    /* does this item have a chunks list in it? */
    int has_chunks;

    enum {
        /* the next thing is a map key at the Item root level,
-         * and it might be the "chunks" or "part" key we're looking for */
+         * and it might be e.g. the "chunks" key we're looking for */
        expect_map_key,

        /* blocking state to expect_map_key
@ -114,11 +111,6 @@ typedef struct unpack_user {
        uint64_t size, num_files;
    } totals;

-    /* total sizes and files count coming from part files */
-    struct {
-        uint64_t size, num_files;
-    } parts;
-
 } unpack_user;

 struct unpack_context;
@ -317,7 +309,6 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n)
        }
        /* This begins a new Item */
        u->expect = expect_map_key;
-        u->part = 0;
        u->has_chunks = 0;
        u->item.size = 0;
    }
@ -358,10 +349,6 @@ static inline int unpack_callback_map_end(unpack_user* u)
    if(u->level == 0) {
        /* This ends processing of an Item */
        if(u->has_chunks) {
-            if(u->part) {
-                u->parts.num_files += 1;
-                u->parts.size += u->item.size;
-            }
            u->totals.num_files += 1;
            u->totals.size += u->item.size;
        }
@ -381,9 +368,6 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
            u->expect = expect_chunks_begin;
            u->inside_chunks = 1;
            u->has_chunks = 1;
-        } else if(length == 4 && !memcmp("part", p, 4)) {
-            u->expect = expect_map_item_end;
-            u->part = 1;
        } else {
            u->expect = expect_map_item_end;
        }
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@ -18,7 +18,9 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'hostname', 'username', 'time', 'ti
                          'command_line', 'recreate_command_line',  # v2+ archives
                          'cmdline', 'recreate_cmdline',  # legacy
                          'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',  # used in 1.1.0b1 .. b2
-                          'size', 'nfiles', 'size_parts', 'nfiles_parts'])
+                          'size', 'nfiles',
+                          'size_parts', 'nfiles_parts',  # legacy v1 archives
+                          ])
 # fmt: on

 # this is the set of keys that are always present in archives:
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@ -147,7 +147,7 @@ class ItemCache:
        else:
            raise ValueError("Invalid entry type in self.meta")

-    def iter_archive_items(self, archive_item_ids, filter=None, consider_part_files=False):
+    def iter_archive_items(self, archive_item_ids, filter=None):
        unpacker = msgpack.Unpacker()

        # Current offset in the metadata stream, which consists of all metadata chunks glued together
@ -193,7 +193,7 @@ class ItemCache:
                    break

                item = Item(internal_dict=item)
-                if filter and not filter(item) or not consider_part_files and "part" in item:
+                if filter and not filter(item):
                    msgpacked_bytes = b""
                    continue

@ -330,15 +330,13 @@ class FuseBackend:
        """Build FUSE inode hierarchy from archive metadata"""
        self.file_versions = {}  # for versions mode: original path -> version
        t0 = time.perf_counter()
-        archive = Archive(self._manifest, archive_name, consider_part_files=self._args.consider_part_files)
+        archive = Archive(self._manifest, archive_name)
        strip_components = self._args.strip_components
        matcher = build_matcher(self._args.patterns, self._args.paths)
        hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path

        filter = build_filter(matcher, strip_components)
-        for item_inode, item in self.cache.iter_archive_items(
-            archive.metadata.items, filter=filter, consider_part_files=self._args.consider_part_files
-        ):
+        for item_inode, item in self.cache.iter_archive_items(archive.metadata.items, filter=filter):
            if strip_components:
                item.path = os.sep.join(item.path.split(os.sep)[strip_components:])
            path = os.fsencode(item.path)
--- a/src/borg/hashindex.pyi
+++ b/src/borg/hashindex.pyi
@ -79,11 +79,7 @@ class FuseVersionsIndex(IndexBase):
    def __setitem__(self, key: bytes, value: Any) -> None: ...

 class CacheSynchronizer:
-    csize_parts: int
-    csize_totals: int
-    num_files_parts: int
-    num_files_totals: int
-    size_parts: int
    size_totals: int
+    num_files_totals: int
    def __init__(self, chunks_index: Any) -> None: ...
    def feed(self, chunk: bytes) -> None: ...
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@ -41,9 +41,7 @@ cdef extern from "cache_sync/cache_sync.c":
    CacheSyncCtx *cache_sync_init(HashIndex *chunks)
    const char *cache_sync_error(const CacheSyncCtx *ctx)
    uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx)
-    uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx)
    uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx)
-    uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx)
    int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
    void cache_sync_free(CacheSyncCtx *ctx)

@ -630,14 +628,6 @@ cdef class CacheSynchronizer:
    def num_files_totals(self):
        return cache_sync_num_files_totals(self.sync)

-    @property
-    def num_files_parts(self):
-        return cache_sync_num_files_parts(self.sync)
-
    @property
    def size_totals(self):
        return cache_sync_size_totals(self.sync)
-
-    @property
-    def size_parts(self):
-        return cache_sync_size_parts(self.sync)
--- a/src/borg/item.pyi
+++ b/src/borg/item.pyi
@ -91,10 +91,6 @@ class ArchiveItem(PropDict):
    @csize.setter
    def csize(self, val: int) -> None: ...
    @property
-    def csize_parts(self) -> int: ...
-    @csize_parts.setter
-    def csize_parts(self, val: int) -> None: ...
-    @property
    def items(self) -> List: ...
    @items.setter
    def items(self, val: List) -> None: ...
--- a/src/borg/item.pyx
+++ b/src/borg/item.pyx
@ -297,7 +297,7 @@ cdef class Item(PropDict):
    deleted = PropDictProperty(bool)
    nlink = PropDictProperty(int)

-    part = PropDictProperty(int)
+    part = PropDictProperty(int)  # legacy only

    def get_size(self, *, memorize=False, from_chunks=False, consider_ids=None):
        """
@ -516,8 +516,8 @@ cdef class ArchiveItem(PropDict):
    recreate_partial_chunks = PropDictProperty(list)  # list of tuples
    size = PropDictProperty(int)
    nfiles = PropDictProperty(int)
-    size_parts = PropDictProperty(int)
-    nfiles_parts = PropDictProperty(int)
+    size_parts = PropDictProperty(int)  # legacy only
+    nfiles_parts = PropDictProperty(int)  # legacy only

    def update_internal(self, d):
        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
--- a/src/borg/testsuite/archiver/create_cmd.py
+++ b/src/borg/testsuite/archiver/create_cmd.py
@ -182,27 +182,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        )
        # repo looking good overall? checks for rc == 0.
        self.cmd(f"--repo={self.repository_location}", "check", "--debug")
-        # verify part files
-        out = self.cmd(
-            f"--repo={self.repository_location}",
-            "extract",
-            "test",
-            "stdin.borg_part_1",
-            "--consider-part-files",
-            "--stdout",
-            binary_output=True,
-        )
-        assert out == input_data[:chunk_size]
-        out = self.cmd(
-            f"--repo={self.repository_location}",
-            "extract",
-            "test",
-            "stdin.borg_part_2",
-            "--consider-part-files",
-            "--stdout",
-            binary_output=True,
-        )
-        assert out == input_data[: chunk_size - 1]
+        # verify that there are no part files in final archive
+        out = self.cmd(f"--repo={self.repository_location}", "list", "test")
+        assert "stdin.borg_part" not in out
        # verify full file
        out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
        assert out == input_data
--- a/src/borg/upgrade.py
+++ b/src/borg/upgrade.py
@ -74,7 +74,6 @@ class UpgraderFrom12To20:
            "acl_access",
            "acl_default",
            "acl_extended",
-            "part",
        }

        if self.hlm.borg1_hardlink_master(item):