Add --format option to `borg diff`, resolve issue #4634 (#7534)

diff: add --format option also: refactoring/improvements of BaseFormatter
2023-06-12 04:41:36 +08:00 · 2023-06-12 04:41:36 +08:00 · 616d5e7330
parent 8506c05ab6
commit 616d5e7330
10 changed files with 492 additions and 266 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -11,6 +11,7 @@ from functools import partial
 from getpass import getuser
 from io import BytesIO
 from itertools import groupby, zip_longest
+from typing import Iterator
 from shutil import get_terminal_size

 from .platformflags import is_win32
@ -297,31 +298,24 @@ class DownloadPipeline:
        unpacker = msgpack.Unpacker(use_list=False)
        for data in self.fetch_many(ids):
            unpacker.feed(data)
-            items = [Item(internal_dict=item) for item in unpacker]
-            for item in items:
+            for _item in unpacker:
+                item = Item(internal_dict=_item)
                if "chunks" in item:
                    item.chunks = [ChunkListEntry(*e) for e in item.chunks]
-
-            if filter:
-                items = [item for item in items if filter(item)]
-
-            if preload:
-                for item in items:
-                    if "chunks" in item:
-                        hlid = item.get("hlid", None)
-                        if hlid is None:
-                            preload_chunks = True
-                        else:
-                            if hlid in hlids_preloaded:
-                                preload_chunks = False
-                            else:
-                                # not having the hardlink's chunks already preloaded for other hardlink to same inode
-                                preload_chunks = True
-                                hlids_preloaded.add(hlid)
-                        if preload_chunks:
-                            self.repository.preload([c.id for c in item.chunks])
-
-            for item in items:
+                if filter and not filter(item):
+                    continue
+                if preload and "chunks" in item:
+                    hlid = item.get("hlid", None)
+                    if hlid is None:
+                        preload_chunks = True
+                    elif hlid in hlids_preloaded:
+                        preload_chunks = False
+                    else:
+                        # not having the hardlink's chunks already preloaded for other hardlink to same inode
+                        preload_chunks = True
+                        hlids_preloaded.add(hlid)
+                    if preload_chunks:
+                        self.repository.preload([c.id for c in item.chunks])
                yield item

    def fetch_many(self, ids, is_preloaded=False):
@ -631,10 +625,9 @@ Duration: {0.duration}
    def iter_items(self, filter=None, preload=False):
        # note: when calling this with preload=True, later fetch_many() must be called with
        # is_preloaded=True or the RemoteRepository code will leak memory!
-        for item in self.pipeline.unpack_many(
+        yield from self.pipeline.unpack_many(
            self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)
-        ):
-            yield item
+        )

    def add_item(self, item, show_progress=True, stats=None):
        if show_progress and self.show_progress:
@ -1123,55 +1116,59 @@ Duration: {0.duration}
            logger.warning("borg check --repair is required to free all space.")

    @staticmethod
-    def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
+    def compare_archives_iter(
+        archive1: "Archive", archive2: "Archive", matcher=None, can_compare_chunk_ids=False
+    ) -> Iterator[ItemDiff]:
        """
-        Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
+        Yields an ItemDiff instance describing changes/indicating equality.

        :param matcher: PatternMatcher class to restrict results to only matching paths.
        :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
        """

-        def compare_items(item1, item2):
+        def compare_items(path: str, item1: Item, item2: Item):
            return ItemDiff(
+                path,
                item1,
                item2,
                archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
                archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
                can_compare_chunk_ids=can_compare_chunk_ids,
-                content_only=content_only,
            )

-        orphans_archive1 = OrderedDict()
-        orphans_archive2 = OrderedDict()
+        orphans_archive1: OrderedDict[str, Item] = OrderedDict()
+        orphans_archive2: OrderedDict[str, Item] = OrderedDict()
+
+        assert matcher is not None, "matcher must be set"

        for item1, item2 in zip_longest(
            archive1.iter_items(lambda item: matcher.match(item.path)),
            archive2.iter_items(lambda item: matcher.match(item.path)),
        ):
            if item1 and item2 and item1.path == item2.path:
-                yield (item1.path, compare_items(item1, item2))
+                yield compare_items(item1.path, item1, item2)
                continue
            if item1:
                matching_orphan = orphans_archive2.pop(item1.path, None)
                if matching_orphan:
-                    yield (item1.path, compare_items(item1, matching_orphan))
+                    yield compare_items(item1.path, item1, matching_orphan)
                else:
                    orphans_archive1[item1.path] = item1
            if item2:
                matching_orphan = orphans_archive1.pop(item2.path, None)
                if matching_orphan:
-                    yield (matching_orphan.path, compare_items(matching_orphan, item2))
+                    yield compare_items(matching_orphan.path, matching_orphan, item2)
                else:
                    orphans_archive2[item2.path] = item2
        # At this point orphans_* contain items that had no matching partner in the other archive
        for added in orphans_archive2.values():
            path = added.path
            deleted_item = Item.create_deleted(path)
-            yield (path, compare_items(deleted_item, added))
+            yield compare_items(path, deleted_item, added)
        for deleted in orphans_archive1.values():
            path = deleted.path
            deleted_item = Item.create_deleted(path)
-            yield (path, compare_items(deleted, deleted_item))
+            yield compare_items(path, deleted, deleted_item)


 class MetadataCollector:
--- a/src/borg/archiver/diff_cmd.py
+++ b/src/borg/archiver/diff_cmd.py
@ -1,13 +1,14 @@
 import argparse
+import textwrap
 import json
+import sys
+import os

-from ._common import with_repository, with_archive, build_matcher
+from ._common import with_repository, with_archive, build_matcher, Highlander
 from ..archive import Archive
 from ..constants import *  # NOQA
-from ..helpers import archivename_validator
+from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, BorgJsonEncoder
 from ..manifest import Manifest
-from ..helpers.parseformat import BorgJsonEncoder
-
 from ..logger import create_logger

 logger = create_logger()
@ -18,14 +19,12 @@ class DiffMixIn:
    @with_archive
    def do_diff(self, args, repository, manifest, archive):
        """Diff contents of two archives"""
-
-        def print_json_output(diff, path):
-            print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))
-
-        def print_text_output(diff, path):
-            print("{:<19} {}".format(" ".join([str for j, str in diff]), path))
-
-        print_output = print_json_output if args.json_lines else print_text_output
+        if args.format is not None:
+            format = args.format
+        elif args.content_only:
+            format = "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}"
+        else:
+            format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")

        archive1 = archive
        archive2 = Archive(manifest, args.other_name)
@ -43,17 +42,36 @@ class DiffMixIn:

        matcher = build_matcher(args.patterns, args.paths)

-        diffs = Archive.compare_archives_iter(
-            archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
+        diffs_iter = Archive.compare_archives_iter(
+            archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
        )
        # Conversion to string and filtering for diff.equal to save memory if sorting
-        diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
+        diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))

        if args.sort:
-            diffs = sorted(diffs)
+            diffs = sorted(diffs, key=lambda diff: diff.path)

-        for path, diff in diffs:
-            print_output(diff, path)
+        formatter = DiffFormatter(format, args.content_only)
+        for diff in diffs:
+            if args.json_lines:
+                print(
+                    json.dumps(
+                        {
+                            "path": diff.path,
+                            "changes": [
+                                change.to_dict()
+                                for name, change in diff.changes().items()
+                                if not args.content_only or (name not in DiffFormatter.METADATA)
+                            ],
+                        },
+                        sort_keys=True,
+                        cls=BorgJsonEncoder,
+                    )
+                )
+            else:
+                res: str = formatter.format_item(diff)
+                if res.strip():
+                    sys.stdout.write(res)

        for pattern in matcher.get_unmatched_include_patterns():
            self.print_warning("Include pattern '%s' never matched.", pattern)
@ -64,25 +82,48 @@ class DiffMixIn:
        from ._common import process_epilog
        from ._common import define_exclusion_group

-        diff_epilog = process_epilog(
-            """
-            This command finds differences (file contents, user/group/mode) between archives.
+        diff_epilog = (
+            process_epilog(
+                """
+        This command finds differences (file contents, metadata) between ARCHIVE1 and ARCHIVE2.

-            A repository location and an archive name must be specified for REPO::ARCHIVE1.
-            ARCHIVE2 is just another archive name in same repository (no repository location
-            allowed).
+        For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.

-            For archives created with Borg 1.1 or newer diff automatically detects whether
-            the archives are created with the same chunker params. If so, only chunk IDs
-            are compared, which is very fast.
+        .. man NOTES

-            For archives prior to Borg 1.1 chunk contents are compared by default.
-            If you did not create the archives with different chunker params,
-            pass ``--same-chunker-params``.
-            Note that the chunker params changed from Borg 0.xx to 1.0.
+        The FORMAT specifier syntax
+        +++++++++++++++++++++++++++

-            For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
-            """
+        The ``--format`` option uses python's `format string syntax
+        <https://docs.python.org/3.9/library/string.html#formatstrings>`_.
+
+        Examples:
+        ::
+
+            $ borg diff --format '{content:30} {path}{NL}' ArchiveFoo ArchiveBar
+            modified:  +4.1 kB  -1.0 kB    file-diff
+            ...
+
+            # {VAR:<NUMBER} - pad to NUMBER columns left-aligned.
+            # {VAR:>NUMBER} - pad to NUMBER columns right-aligned.
+            $ borg diff --format '{content:>30} {path}{NL}' ArchiveFoo ArchiveBar
+               modified:  +4.1 kB  -1.0 kB file-diff
+            ...
+
+        The following keys are always available:
+
+
+        """
+            )
+            + BaseFormatter.keys_help()
+            + textwrap.dedent(
+                """
+
+        Keys available only when showing differences between archives:
+
+        """
+            )
+            + DiffFormatter.keys_help()
        )
        subparser = subparsers.add_parser(
            "diff",
@ -107,6 +148,13 @@ class DiffMixIn:
            help="Override check of chunker parameters.",
        )
        subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
+        subparser.add_argument(
+            "--format",
+            metavar="FORMAT",
+            dest="format",
+            action=Highlander,
+            help='specify format for differences between archives (default: "{change} {path}{NL}")',
+        )
        subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
        subparser.add_argument(
            "--content-only",
--- a/src/borg/archiver/list_cmd.py
+++ b/src/borg/archiver/list_cmd.py
@ -29,10 +29,9 @@ class ListMixIn:

        def _list_inner(cache):
            archive = Archive(manifest, args.name, cache=cache)
-
-            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
+            formatter = ItemFormatter(archive, format)
            for item in archive.iter_items(lambda item: matcher.match(item.path)):
-                sys.stdout.write(formatter.format_item(item))
+                sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))

        # Only load the cache if it will be used
        if ItemFormatter.format_needs_cache(format):
--- a/src/borg/archiver/prune_cmd.py
+++ b/src/borg/archiver/prune_cmd.py
@ -89,7 +89,7 @@ class PruneMixIn:
            format = "{archive}"
        else:
            format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]")
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=False, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)

        checkpoint_re = r"\.checkpoint(\.\d+)?"
        archives_checkpoints = manifest.archives.list(
@ -169,7 +169,7 @@ class PruneMixIn:
                    or (args.list_pruned and archive in to_delete)
                    or (args.list_kept and archive not in to_delete)
                ):
-                    list_logger.info(f"{log_message:<40} {formatter.format_item(archive)}")
+                    list_logger.info(f"{log_message:<40} {formatter.format_item(archive, jsonline=False)}")
            pi.finish()
            if sig_int:
                # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
--- a/src/borg/archiver/rlist_cmd.py
+++ b/src/borg/archiver/rlist_cmd.py
@ -23,15 +23,15 @@ class RListMixIn:
            format = "{archive}{NL}"
        else:
            format = os.environ.get("BORG_RLIST_FORMAT", "{archive:<36} {time} [{id}]{NL}")
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=args.json, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)

        output_data = []

        for archive_info in manifest.archives.list_considering(args):
            if args.json:
-                output_data.append(formatter.get_item_data(archive_info))
+                output_data.append(formatter.get_item_data(archive_info, args.json))
            else:
-                sys.stdout.write(formatter.format_item(archive_info))
+                sys.stdout.write(formatter.format_item(archive_info, args.json))

        if args.json:
            json_print(basic_json_data(manifest, extra={"archives": output_data}))
--- a/src/borg/helpers/init.py
+++ b/src/borg/helpers/init.py
@ -28,7 +28,7 @@ from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Locatio
 from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
 from .parseformat import format_archive, parse_stringified_list, clean_lines
 from .parseformat import location_validator, archivename_validator, comment_validator
-from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
+from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, DiffFormatter, file_status
 from .parseformat import swidth_slice, ellipsis_truncate
 from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
 from .parseformat import Highlander, MakePathSafeAction
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@ -1,3 +1,4 @@
+import abc
 import argparse
 import base64
 import hashlib
@ -8,6 +9,7 @@ import re
 import shlex
 import stat
 import uuid
+from typing import List, Dict, Set, Tuple, ClassVar, Any, TYPE_CHECKING, Literal
 from binascii import hexlify
 from collections import Counter, OrderedDict
 from datetime import datetime, timezone
@ -27,6 +29,9 @@ from .. import __version_tuple__ as borg_version_tuple
 from ..constants import *  # NOQA
 from ..platformflags import is_win32

+if TYPE_CHECKING:
+    from ..item import ItemDiff
+

 def bin_to_hex(binary):
    return hexlify(binary).decode("ascii")
@ -649,8 +654,10 @@ def archivename_validator(text):
    return validate_text(text)


-class BaseFormatter:
-    FIXED_KEYS = {
+class BaseFormatter(metaclass=abc.ABCMeta):
+    format: str
+    static_data: Dict[str, Any]
+    FIXED_KEYS: ClassVar[Dict[str, str]] = {
        # Formatting aids
        "LF": "\n",
        "SPACE": " ",
@ -660,25 +667,49 @@ class BaseFormatter:
        "NEWLINE": "\n",
        "NL": "\n",  # \n is automatically converted to os.linesep on write
    }
+    KEY_DESCRIPTIONS: ClassVar[Dict[str, str]] = {
+        "NEWLINE": "OS dependent line separator",
+        "NL": "alias of NEWLINE",
+        "NUL": "NUL character for creating print0 / xargs -0 like output",
+        "SPACE": "space character",
+        "TAB": "tab character",
+        "CR": "carriage return character",
+        "LF": "line feed character",
+    }
+    KEY_GROUPS: ClassVar[Tuple[Tuple[str, ...], ...]] = (("NEWLINE", "NL", "NUL", "SPACE", "TAB", "CR", "LF"),)

-    def get_item_data(self, item):
+    def __init__(self, format: str, static: Dict[str, Any]) -> None:
+        self.format = partial_format(format, static)
+        self.static_data = static
+
+    @abc.abstractmethod
+    def get_item_data(self, item, jsonline=False) -> dict:
        raise NotImplementedError

-    def format_item(self, item):
-        return self.format.format_map(self.get_item_data(item))
-
-    @staticmethod
-    def keys_help():
+    def format_item(self, item, jsonline=False, sort=False):
+        data = self.get_item_data(item, jsonline)
        return (
-            "- NEWLINE: OS dependent line separator\n"
-            "- NL: alias of NEWLINE\n"
-            "- NUL: NUL character for creating print0 / xargs -0 like output\n"
-            "- SPACE\n"
-            "- TAB\n"
-            "- CR\n"
-            "- LF"
+            f"{json.dumps(data, cls=BorgJsonEncoder, sort_keys=sort)}\n" if jsonline else self.format.format_map(data)
        )

+    @classmethod
+    def keys_help(cls):
+        help = []
+        keys: Set[str] = set()
+        keys.update(cls.KEY_DESCRIPTIONS.keys())
+        keys.update(key for group in cls.KEY_GROUPS for key in group)
+
+        for group in cls.KEY_GROUPS:
+            for key in group:
+                keys.remove(key)
+                text = "- " + key
+                if key in cls.KEY_DESCRIPTIONS:
+                    text += ": " + cls.KEY_DESCRIPTIONS[key]
+                help.append(text)
+            help.append("")
+        assert not keys, str(keys)
+        return "\n".join(help)
+

 class ArchiveFormatter(BaseFormatter):
    KEY_DESCRIPTIONS = {
@ -703,47 +734,17 @@ class ArchiveFormatter(BaseFormatter):
        ("size", "nfiles"),
    )

-    @classmethod
-    def available_keys(cls):
-        from ..manifest import ArchiveInfo
-
-        fake_archive_info = ArchiveInfo("archivename", b"\1" * 32, datetime(1970, 1, 1, tzinfo=timezone.utc))
-        formatter = cls("", None, None, None)
-        keys = []
-        keys.extend(formatter.call_keys.keys())
-        keys.extend(formatter.get_item_data(fake_archive_info).keys())
-        return keys
-
-    @classmethod
-    def keys_help(cls):
-        help = []
-        keys = cls.available_keys()
-        for key in cls.FIXED_KEYS:
-            keys.remove(key)
-
-        for group in cls.KEY_GROUPS:
-            for key in group:
-                keys.remove(key)
-                text = "- " + key
-                if key in cls.KEY_DESCRIPTIONS:
-                    text += ": " + cls.KEY_DESCRIPTIONS[key]
-                help.append(text)
-            help.append("")
-        assert not keys, str(keys)
-        return "\n".join(help)
-
-    def __init__(self, format, repository, manifest, key, *, json=False, iec=False):
+    def __init__(self, format, repository, manifest, key, *, iec=False):
+        static_data = {}  # here could be stuff on repo level, above archive level
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format, static_data)
        self.repository = repository
        self.manifest = manifest
        self.key = key
        self.name = None
        self.id = None
        self._archive = None
-        self.json = json
        self.iec = iec
-        static_keys = {}  # here could be stuff on repo level, above archive level
-        static_keys.update(self.FIXED_KEYS)
-        self.format = partial_format(format, static_keys)
        self.format_keys = {f[1] for f in Formatter().parse(format)}
        self.call_keys = {
            "hostname": partial(self.get_meta, "hostname", ""),
@ -755,20 +756,12 @@ class ArchiveFormatter(BaseFormatter):
            "end": self.get_ts_end,
        }
        self.used_call_keys = set(self.call_keys) & self.format_keys
-        if self.json:
-            self.item_data = {}
-            self.format_item = self.format_item_json
-        else:
-            self.item_data = static_keys

-    def format_item_json(self, item):
-        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + "\n"
-
-    def get_item_data(self, archive_info):
+    def get_item_data(self, archive_info, jsonline=False):
        self.name = archive_info.name
        self.id = archive_info.id
        item_data = {}
-        item_data.update(self.item_data)
+        item_data.update({} if jsonline else self.static_data)
        item_data.update(
            {
                "name": archive_info.name,
@ -812,15 +805,31 @@ class ItemFormatter(BaseFormatter):
    # shake_* is not provided because it uses an incompatible .digest() method to support variable length.
    hash_algorithms = set(hashlib.algorithms_guaranteed).union({"xxh64"}).difference({"shake_128", "shake_256"})
    KEY_DESCRIPTIONS = {
+        "type": "file type (file, dir, symlink, ...)",
+        "mode": "file mode (as in stat)",
+        "uid": "user id of file owner",
+        "gid": "group id of file owner",
+        "user": "user name of file owner",
+        "group": "group name of file owner",
        "path": "file path",
        "target": "link target for symlinks",
        "hlid": "hard link identity (same if hardlinking same fs object)",
+        "flags": "file flags",
        "extra": 'prepends {target} with " -> " for soft links and " link to " for hard links',
+        "size": "file size",
        "dsize": "deduplicated size",
        "num_chunks": "number of chunks in this file",
        "unique_chunks": "number of unique chunks in this file",
+        "mtime": "file modification time",
+        "ctime": "file change time",
+        "atime": "file access time",
+        "isomtime": "file modification time (ISO 8601 format)",
+        "isoctime": "file change time (ISO 8601 format)",
+        "isoatime": "file access time (ISO 8601 format)",
        "xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
        "health": 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
+        "archiveid": "internal ID of the archive",
+        "archivename": "name of the archive",
    }
    KEY_GROUPS = (
        ("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "flags"),
@ -833,57 +842,19 @@ class ItemFormatter(BaseFormatter):

    KEYS_REQUIRING_CACHE = ("dsize", "unique_chunks")

-    @classmethod
-    def available_keys(cls):
-        class FakeArchive:
-            fpr = name = ""
-
-        from ..item import Item
-
-        fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
-        formatter = cls(FakeArchive, "")
-        keys = []
-        keys.extend(formatter.call_keys.keys())
-        keys.extend(formatter.get_item_data(fake_item).keys())
-        return keys
-
-    @classmethod
-    def keys_help(cls):
-        help = []
-        keys = cls.available_keys()
-        for key in cls.FIXED_KEYS:
-            keys.remove(key)
-
-        for group in cls.KEY_GROUPS:
-            for key in group:
-                keys.remove(key)
-                text = "- " + key
-                if key in cls.KEY_DESCRIPTIONS:
-                    text += ": " + cls.KEY_DESCRIPTIONS[key]
-                help.append(text)
-            help.append("")
-        assert not keys, str(keys)
-        return "\n".join(help)
-
    @classmethod
    def format_needs_cache(cls, format):
        format_keys = {f[1] for f in Formatter().parse(format)}
        return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)

-    def __init__(self, archive, format, *, json_lines=False):
+    def __init__(self, archive, format):
        from ..checksums import StreamingXXH64

+        static_data = {"archivename": archive.name, "archiveid": archive.fpr}
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format, static_data)
        self.xxh64 = StreamingXXH64
        self.archive = archive
-        self.json_lines = json_lines
-        static_keys = {"archivename": archive.name, "archiveid": archive.fpr}
-        static_keys.update(self.FIXED_KEYS)
-        if self.json_lines:
-            self.item_data = {}
-            self.format_item = self.format_item_json
-        else:
-            self.item_data = static_keys
-        self.format = partial_format(format, static_keys)
        self.format_keys = {f[1] for f in Formatter().parse(format)}
        self.call_keys = {
            "size": self.calculate_size,
@ -901,17 +872,14 @@ class ItemFormatter(BaseFormatter):
            self.call_keys[hash_function] = partial(self.hash_item, hash_function)
        self.used_call_keys = set(self.call_keys) & self.format_keys

-    def format_item_json(self, item):
-        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + "\n"
-
-    def get_item_data(self, item):
+    def get_item_data(self, item, jsonline=False):
        item_data = {}
-        item_data.update(self.item_data)
+        item_data.update({} if jsonline else self.static_data)

        item_data.update(text_to_json("path", item.path))
        target = item.get("target", "")
        item_data.update(text_to_json("target", target))
-        if not self.json_lines:
+        if not jsonline:
            item_data["extra"] = "" if not target else f" -> {item_data['target']}"

        hlid = item.get("hlid")
@ -928,7 +896,7 @@ class ItemFormatter(BaseFormatter):
        item_data.update(text_to_json("user", item.get("user", str(item_data["uid"]))))
        item_data.update(text_to_json("group", item.get("group", str(item_data["gid"]))))

-        if self.json_lines:
+        if jsonline:
            item_data["healthy"] = "chunks_healthy" not in item
        else:
            item_data["health"] = "broken" if "chunks_healthy" in item else "healthy"
@ -944,7 +912,7 @@ class ItemFormatter(BaseFormatter):

        item: The item to sum its unique chunks' metadata
        metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
-                       the metadata needed from the chunk
+        the metadata needed from the chunk
        """
        chunk_index = self.archive.cache.chunks
        chunks = item.get("chunks", [])
@ -976,6 +944,134 @@ class ItemFormatter(BaseFormatter):
        return self.format_time(key, item).isoformat()


+class DiffFormatter(BaseFormatter):
+    KEY_DESCRIPTIONS = {
+        "path": "archived file path",
+        "change": "all available changes",
+        "content": "file content change",
+        "mode": "file mode change",
+        "type": "file type change",
+        "owner": "file owner (user/group) change",
+        "user": "file user change",
+        "group": "file group change",
+        "link": "file link change",
+        "directory": "file directory change",
+        "blkdev": "file block device change",
+        "chrdev": "file character device change",
+        "fifo": "file fifo change",
+        "mtime": "file modification time change",
+        "ctime": "file change time change",
+        "isomtime": "file modification time change (ISO 8601)",
+        "isoctime": "file creation time change (ISO 8601)",
+    }
+    KEY_GROUPS = (
+        ("path", "change"),
+        ("content", "mode", "type", "owner", "group", "user"),
+        ("link", "directory", "blkdev", "chrdev", "fifo"),
+        ("mtime", "ctime", "isomtime", "isoctime"),
+    )
+    METADATA = ("mode", "type", "owner", "group", "user", "mtime", "ctime")
+
+    def __init__(self, format, content_only=False):
+        static_data = {}
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format or "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}", static_data)
+        self.content_only = content_only
+        self.format_keys = {f[1] for f in Formatter().parse(format)}
+        self.call_keys = {
+            "content": self.format_content,
+            "mode": self.format_mode,
+            "type": partial(self.format_mode, filetype=True),
+            "owner": partial(self.format_owner),
+            "group": partial(self.format_owner, spec="group"),
+            "user": partial(self.format_owner, spec="user"),
+            "link": partial(self.format_other, "link"),
+            "directory": partial(self.format_other, "directory"),
+            "blkdev": partial(self.format_other, "blkdev"),
+            "chrdev": partial(self.format_other, "chrdev"),
+            "fifo": partial(self.format_other, "fifo"),
+            "mtime": partial(self.format_time, "mtime"),
+            "ctime": partial(self.format_time, "ctime"),
+            "isomtime": partial(self.format_iso_time, "mtime"),
+            "isoctime": partial(self.format_iso_time, "ctime"),
+        }
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+        if self.content_only:
+            self.used_call_keys -= set(self.METADATA)
+
+    def get_item_data(self, item: "ItemDiff", jsonline=False) -> dict:
+        diff_data = {}
+        for key in self.used_call_keys:
+            diff_data[key] = self.call_keys[key](item)
+
+        change = []
+        for key in self.call_keys:
+            if key in ("isomtime", "isoctime"):
+                continue
+            if self.content_only and key in self.METADATA:
+                continue
+            change.append(self.call_keys[key](item))
+        diff_data["change"] = " ".join([v for v in change if v])
+        diff_data["path"] = item.path
+        diff_data.update({} if jsonline else self.static_data)
+        return diff_data
+
+    def format_other(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return f"{change.diff_type}".ljust(27) if change else ""  # 27 is the length of the content change
+
+    def format_mode(self, diff: "ItemDiff", filetype=False):
+        change = diff.type() if filetype else diff.mode()
+        return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+
+    def format_owner(self, diff: "ItemDiff", spec: Literal["owner", "user", "group"] = "owner"):
+        if spec == "user":
+            change = diff.user()
+            return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+        if spec == "group":
+            change = diff.group()
+            return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+        if spec != "owner":
+            raise ValueError(f"Invalid owner spec: {spec}")
+        change = diff.owner()
+        if change:
+            return "[{}:{} -> {}:{}]".format(
+                change.diff_data["item1"][0],
+                change.diff_data["item1"][1],
+                change.diff_data["item2"][0],
+                change.diff_data["item2"][1],
+            )
+        return ""
+
+    def format_content(self, diff: "ItemDiff"):
+        change = diff.content()
+        if change:
+            if change.diff_type == "added":
+                return "{}: {:>20}".format(change.diff_type, format_file_size(change.diff_data["added"]))
+            if change.diff_type == "removed":
+                return "{}: {:>18}".format(change.diff_type, format_file_size(change.diff_data["removed"]))
+            if "added" not in change.diff_data and "removed" not in change.diff_data:
+                return "modified:  (can't get size)"
+            return "{}: {:>8} {:>8}".format(
+                change.diff_type,
+                format_file_size(change.diff_data["added"], precision=1, sign=True),
+                format_file_size(-change.diff_data["removed"], precision=1, sign=True),
+            )
+        return ""
+
+    def format_time(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return f"[{key}: {change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+
+    def format_iso_time(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return (
+            f"[{key}: {change.diff_data['item1'].isoformat()} -> {change.diff_data['item2'].isoformat()}]"
+            if change
+            else ""
+        )
+
+
 def file_status(mode):
    if stat.S_ISREG(mode):
        return "A"
--- a/src/borg/item.pyi
+++ b/src/borg/item.pyi
@ -1,4 +1,4 @@
-from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any
+from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any, Optional

 from .helpers import StableDict

@ -247,9 +247,36 @@ class ManifestItem(PropDict):
    @item_keys.setter
    def item_keys(self, val: Tuple) -> None: ...

+class DiffChange:
+    diff_type: str
+    diff_data: Dict[str, Any]
+    def __init__(self, diff_type: str, diff_data: Optional[Dict[str, Any]] = ...) -> None: ...
+    def to_dict(self) -> Dict[str, Any]: ...
+
 class ItemDiff:
-    def __init__(self, *args, **kwargs) -> None: ...
-    def _chunk_content_equal(self, c1: Iterator, c2: Iterator) -> bool: ...
+    path: str
+    def __init__(
+        self,
+        path: str,
+        item1: Item,
+        item2: Item,
+        chunk_1: Iterator,
+        chunk_2: Iterator,
+        numeric_ids: bool = ...,
+        can_compare_chunk_ids: bool = ...,
+    ) -> None: ...
+    def changes(self) -> Dict[str, DiffChange]: ...
+    def equal(self, content_only: bool = ...) -> bool: ...
+    def content(self) -> Optional[DiffChange]: ...
+    def ctime(self) -> Optional[DiffChange]: ...
+    def mtime(self) -> Optional[DiffChange]: ...
+    def mode(self) -> Optional[DiffChange]: ...
+    def type(self) -> Optional[DiffChange]: ...
+    def owner(self) -> Optional[DiffChange]: ...
+    def user(self) -> Optional[DiffChange]: ...
+    def group(self) -> Optional[DiffChange]: ...
+
+def chunk_content_equal(chunks_a: Iterator, chunks_b: Iterator) -> bool: ...

 class Key(PropDict):
    @property
--- a/src/borg/item.pyx
+++ b/src/borg/item.pyx
@ -620,66 +620,76 @@ cpdef _init_names():
 _init_names()


+class DiffChange:
+    """
+    Stores a change in a diff.
+
+    The diff_type denotes the type of change, e.g. "added", "removed", "modified".
+    The diff_data contains additional information about the change, e.g. the old and new mode.
+    """
+    def __init__(self, diff_type, diff_data=None):
+        self.diff_type = diff_type
+        self.diff_data = diff_data or {}
+
+    def to_dict(self):
+        return {"type": self.diff_type, **self.diff_data}
+
+
 class ItemDiff:
    """
    Comparison of two items from different archives.

    The items may have different paths and still be considered equal (e.g. for renames).
-    It does not include extended or time attributes in the comparison.
    """

-    def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
+    def __init__(self, path, item1, item2, chunk_1, chunk_2, numeric_ids=False, can_compare_chunk_ids=False):
+        self.path = path
        self._item1 = item1
        self._item2 = item2
-        self._content_only = content_only
        self._numeric_ids = numeric_ids
        self._can_compare_chunk_ids = can_compare_chunk_ids
-        self.equal = self._equal(chunk_iterator1, chunk_iterator2)
-        changes = []
+        self._chunk_1 = chunk_1
+        self._chunk_2 = chunk_2
+        
+        self._changes = {}

        if self._item1.is_link() or self._item2.is_link():
-            changes.append(self._link_diff())
+            self._link_diff()

        if 'chunks' in self._item1 and 'chunks' in self._item2:
-            changes.append(self._content_diff())
+            self._content_diff()

        if self._item1.is_dir() or self._item2.is_dir():
-            changes.append(self._presence_diff('directory'))
+            self._presence_diff('directory')

        if self._item1.is_blk() or self._item2.is_blk():
-            changes.append(self._presence_diff('blkdev'))
+            self._presence_diff('blkdev')

        if self._item1.is_chr() or self._item2.is_chr():
-            changes.append(self._presence_diff('chrdev'))
+            self._presence_diff('chrdev')

        if self._item1.is_fifo() or self._item2.is_fifo():
-            changes.append(self._presence_diff('fifo'))
+            self._presence_diff('fifo')

-        if not self._content_only:
-            if not (self._item1.get('deleted') or self._item2.get('deleted')):
-                changes.append(self._owner_diff())
-                changes.append(self._mode_diff())
-                changes.extend(self._time_diffs())
+        if not (self._item1.get('deleted') or self._item2.get('deleted')):
+            self._owner_diff()
+            self._mode_diff()
+            self._time_diffs()

-        # filter out empty changes
-        self._changes = [ch for ch in changes if ch]

    def changes(self):
        return self._changes

    def __repr__(self):
-        if self.equal:
-            return 'equal'
-        return ' '.join(str for d, str in self._changes)
+        return (' '.join(self._changes.keys())) or 'equal'

-    def _equal(self, chunk_iterator1, chunk_iterator2):
+    def equal(self, content_only=False):
        # if both are deleted, there is nothing at path regardless of what was deleted
        if self._item1.get('deleted') and self._item2.get('deleted'):
            return True

        attr_list = ['deleted', 'target']
-
-        if not self._content_only:
+        if not content_only:
            attr_list += ['mode', 'ctime', 'mtime']
            attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']

@ -693,74 +703,107 @@ class ItemDiff:
                return False

        if 'chunks' in self._item1 and 'chunks' in self._item2:
-            return self._content_equal(chunk_iterator1, chunk_iterator2)
+            return self._content_equal()

        return True

    def _presence_diff(self, item_type):
        if not self._item1.get('deleted') and self._item2.get('deleted'):
-            chg = 'removed ' + item_type
-            return ({"type": chg}, chg)
+            self._changes[item_type] = DiffChange(f"removed {item_type}")
+            return True
        if self._item1.get('deleted') and not self._item2.get('deleted'):
-            chg = 'added ' + item_type
-            return ({"type": chg}, chg)
+            self._changes[item_type] = DiffChange(f"added {item_type}")
+            return True

    def _link_diff(self):
-        pd = self._presence_diff('link')
-        if pd is not None:
-            return pd
+        if self._presence_diff('link'):
+            return True
        if 'target' in self._item1 and 'target' in self._item2 and self._item1.target != self._item2.target:
-            return ({"type": 'changed link'}, 'changed link')
+            self._changes['link'] = DiffChange('changed link')
+            return True

    def _content_diff(self):
        if self._item1.get('deleted'):
            sz = self._item2.get_size()
-            return ({"type": "added", "size": sz}, 'added {:>13}'.format(format_file_size(sz)))
+            self._changes['content'] = DiffChange("added", {"added": sz, "removed": 0})
+            return True
        if self._item2.get('deleted'):
            sz = self._item1.get_size()
-            return ({"type": "removed", "size": sz}, 'removed {:>11}'.format(format_file_size(sz)))
+            self._changes['content'] = DiffChange("removed", {"added": 0, "removed": sz})
+            return True
        if not self._can_compare_chunk_ids:
-            return ({"type": "modified"}, "modified")
+            self._changes['content'] = DiffChange("modified")
+            return True
        chunk_ids1 = {c.id for c in self._item1.chunks}
        chunk_ids2 = {c.id for c in self._item2.chunks}
        added_ids = chunk_ids2 - chunk_ids1
        removed_ids = chunk_ids1 - chunk_ids2
        added = self._item2.get_size(consider_ids=added_ids)
        removed = self._item1.get_size(consider_ids=removed_ids)
-        return ({"type": "modified", "added": added, "removed": removed},
-            '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
-            format_file_size(-removed, precision=1, sign=True)))
+        self._changes['content'] = DiffChange("modified", {"added": added, "removed": removed})
+        return True
+

    def _owner_diff(self):
        u_attr, g_attr = ('uid', 'gid') if self._numeric_ids else ('user', 'group')
        u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr)
        u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr)
-        if (u1, g1) != (u2, g2):
-            return ({"type": "owner", "old_user": u1, "old_group": g1, "new_user": u2, "new_group": g2},
-                    '[{}:{} -> {}:{}]'.format(u1, g1, u2, g2))
+        if (u1, g1) == (u2, g2):
+            return False
+        self._changes['owner'] = DiffChange("changed owner", {"item1": (u1, g1), "item2": (u2, g2)})
+        if u1 != u2:
+            self._changes['user'] = DiffChange("changed user", {"item1": u1, "item2": u2})
+        if g1 != g2:
+            self._changes['group'] = DiffChange("changed group", {"item1": g1, "item2": g2})
+        return True

    def _mode_diff(self):
        if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode:
            mode1 = stat.filemode(self._item1.mode)
            mode2 = stat.filemode(self._item2.mode)
-            return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
+            self._changes['mode'] = DiffChange("changed mode", {"item1": mode1, "item2": mode2})
+            if mode1[0] != mode2[0]:
+                self._changes['type'] = DiffChange("changed type", {"item1": mode1[0], "item2": mode2[0]})

    def _time_diffs(self):
-        changes = []
        attrs = ["ctime", "mtime"]
        for attr in attrs:
            if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
                ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
                ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
-                changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
-        return changes
+                self._changes[attr] = DiffChange(attr, {"item1": ts1, "item2": ts2},)
+        return True

-    def _content_equal(self, chunk_iterator1, chunk_iterator2):
+    def content(self):
+        return self._changes.get('content')
+
+    def ctime(self):
+        return self._changes.get('ctime')
+
+    def mtime(self):
+        return self._changes.get('mtime')
+
+    def mode(self):
+        return self._changes.get('mode')
+
+    def type(self):
+        return self._changes.get('type')
+
+    def owner(self):
+        return self._changes.get('owner')
+
+    def user(self):
+        return self._changes.get('user')
+
+    def group(self):
+        return self._changes.get('group')
+
+    def _content_equal(self):
        if self._can_compare_chunk_ids:
            return self._item1.chunks == self._item2.chunks
        if self._item1.get_size() != self._item2.get_size():
            return False
-        return chunks_contents_equal(chunk_iterator1, chunk_iterator2)
+        return chunks_contents_equal(self._chunk_1, self._chunk_2)


 def chunks_contents_equal(chunks_a, chunks_b):
--- a/src/borg/testsuite/archiver/diff_cmd.py
+++ b/src/borg/testsuite/archiver/diff_cmd.py
@ -72,22 +72,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")

        def do_asserts(output, can_compare_ids, content_only=False):
-            # File contents changed (deleted and replaced with a new file)
-            change = "B" if can_compare_ids else "{:<19}".format("modified")
-            lines = output.splitlines()
+            lines: list = output.splitlines()
            assert "file_replaced" in output  # added to debug #3494
+            change = "modified.*B" if can_compare_ids else r"modified:  \(can't get size\)"
            self.assert_line_exists(lines, f"{change}.*input/file_replaced")
-
            # File unchanged
            assert "input/file_unchanged" not in output

            # Directory replaced with a regular file
            if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
-                self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")
+                self.assert_line_exists(lines, "[drwxr-xr-x -> -rwxr-xr-x].*input/dir_replaced_with_file")

            # Basic directory cases
-            assert "added directory     input/dir_added" in output
-            assert "removed directory   input/dir_removed" in output
+            assert "added directory             input/dir_added" in output
+            assert "removed directory           input/dir_removed" in output

            if are_symlinks_supported():
                # Basic symlink cases
@ -96,8 +94,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                self.assert_line_exists(lines, "removed link.*input/link_removed")

                # Symlink replacing or being replaced
-                assert "input/dir_replaced_with_link" in output
-                assert "input/link_replaced_by_file" in output
+                if not content_only:
+                    assert "input/dir_replaced_with_link" in output
+                    assert "input/link_replaced_by_file" in output

                # Symlink target removed. Should not affect the symlink at all.
                assert "input/link_target_removed" not in output
@ -105,7 +104,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            # The inode has two links and the file contents changed. Borg
            # should notice the changes in both links. However, the symlink
            # pointing to the file is not changed.
-            change = "0 B" if can_compare_ids else "{:<19}".format("modified")
+            change = "modified.*0 B" if can_compare_ids else r"modified:  \(can't get size\)"
            self.assert_line_exists(lines, f"{change}.*input/empty")
            if are_hardlinks_supported():
                self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
@ -114,18 +113,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):

            # Added a new file and a hard link to it. Both links to the same
            # inode should appear as separate files.
-            assert "added       2.05 kB input/file_added" in output
+            assert "added:              2.05 kB input/file_added" in output
            if are_hardlinks_supported():
-                assert "added       2.05 kB input/hardlink_added" in output
+                assert "added:              2.05 kB input/hardlink_added" in output

            # check if a diff between nonexistent and empty new file is found
-            assert "added           0 B input/file_empty_added" in output
+            assert "added:                  0 B input/file_empty_added" in output

            # The inode has two links and both of them are deleted. They should
            # appear as two deleted files.
-            assert "removed       256 B input/file_removed" in output
+            assert "removed:              256 B input/file_removed" in output
            if are_hardlinks_supported():
-                assert "removed       256 B input/hardlink_removed" in output
+                assert "removed:              256 B input/hardlink_removed" in output

            if are_hardlinks_supported() and content_only:
                # Another link (marked previously as the source in borg) to the
@ -143,7 +142,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                chgsets = [j["changes"] for j in data if j["path"] == filename]
                assert len(chgsets) < 2
                # return a flattened list of changes for given filename
-                return [chg for chgset in chgsets for chg in chgset]
+                return sum(chgsets, [])

            # convert output to list of dicts
            joutput = [json.loads(line) for line in output.split("\n") if line]
@ -157,7 +156,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):

            # Directory replaced with a regular file
            if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
-                assert {"type": "mode", "old_mode": "drwxr-xr-x", "new_mode": "-rwxr-xr-x"} in get_changes(
+                assert {"type": "changed mode", "item1": "drwxr-xr-x", "item2": "-rwxr-xr-x"} in get_changes(
                    "input/dir_replaced_with_file", joutput
                )

@ -175,11 +174,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):

                if not content_only:
                    assert any(
-                        chg["type"] == "mode" and chg["new_mode"].startswith("l")
+                        chg["type"] == "changed mode" and chg["item1"].startswith("d") and chg["item2"].startswith("l")
                        for chg in get_changes("input/dir_replaced_with_link", joutput)
                    ), get_changes("input/dir_replaced_with_link", joutput)
                    assert any(
-                        chg["type"] == "mode" and chg["old_mode"].startswith("l")
+                        chg["type"] == "changed mode" and chg["item1"].startswith("l") and chg["item2"].startswith("-")
                        for chg in get_changes("input/link_replaced_by_file", joutput)
                    ), get_changes("input/link_replaced_by_file", joutput)

@ -198,18 +197,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):

            # Added a new file and a hard link to it. Both links to the same
            # inode should appear as separate files.
-            assert {"type": "added", "size": 2048} in get_changes("input/file_added", joutput)
+            assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/file_added", joutput)
            if are_hardlinks_supported():
-                assert {"type": "added", "size": 2048} in get_changes("input/hardlink_added", joutput)
+                assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/hardlink_added", joutput)

            # check if a diff between nonexistent and empty new file is found
-            assert {"type": "added", "size": 0} in get_changes("input/file_empty_added", joutput)
+            assert {"added": 0, "removed": 0, "type": "added"} in get_changes("input/file_empty_added", joutput)

            # The inode has two links and both of them are deleted. They should
            # appear as two deleted files.
-            assert {"type": "removed", "size": 256} in get_changes("input/file_removed", joutput)
+            assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/file_removed", joutput)
            if are_hardlinks_supported():
-                assert {"type": "removed", "size": 256} in get_changes("input/hardlink_removed", joutput)
+                assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/hardlink_removed", joutput)

            if are_hardlinks_supported() and content_only:
                # Another link (marked previously as the source in borg) to the
@ -251,14 +250,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            time.sleep(1)  # HFS has a 1s timestamp granularity
        self.create_regular_file("test_file", size=15)
        self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
-        output = self.cmd(f"--repo={self.repository_location}", "diff", "archive1", "archive2")
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "diff",
+            "archive1",
+            "archive2",
+            "--format",
+            "'{mtime}{ctime} {path}{NL}'",
+        )
        self.assert_in("mtime", output)
        self.assert_in("ctime", output)  # Should show up on windows as well since it is a new file.
        if is_darwin:
            time.sleep(1)  # HFS has a 1s timestamp granularity
        os.chmod("input/test_file", 0o777)
        self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
-        output = self.cmd(f"--repo={self.repository_location}", "diff", "archive2", "archive3")
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "diff",
+            "archive2",
+            "archive3",
+            "--format",
+            "'{mtime}{ctime} {path}{NL}'",
+        )
        self.assert_not_in("mtime", output)
        # Checking platform because ctime should not be shown on windows since it wasn't recreated.
        if not is_win32:
@ -294,7 +307,10 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            "e_file_changed",
            "f_file_removed",
        ]
-        assert all(x in line for x, line in zip(expected, output.splitlines()))
+        assert isinstance(output, str)
+        outputs = output.splitlines()
+        assert len(outputs) == len(expected)
+        assert all(x in line for x, line in zip(expected, outputs))


 class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):