mirror of https://github.com/borgbackup/borg.git
diff: add --format option also: refactoring/improvements of BaseFormatter
This commit is contained in:
parent
8506c05ab6
commit
616d5e7330
|
@ -11,6 +11,7 @@ from functools import partial
|
|||
from getpass import getuser
|
||||
from io import BytesIO
|
||||
from itertools import groupby, zip_longest
|
||||
from typing import Iterator
|
||||
from shutil import get_terminal_size
|
||||
|
||||
from .platformflags import is_win32
|
||||
|
@ -297,31 +298,24 @@ class DownloadPipeline:
|
|||
unpacker = msgpack.Unpacker(use_list=False)
|
||||
for data in self.fetch_many(ids):
|
||||
unpacker.feed(data)
|
||||
items = [Item(internal_dict=item) for item in unpacker]
|
||||
for item in items:
|
||||
for _item in unpacker:
|
||||
item = Item(internal_dict=_item)
|
||||
if "chunks" in item:
|
||||
item.chunks = [ChunkListEntry(*e) for e in item.chunks]
|
||||
|
||||
if filter:
|
||||
items = [item for item in items if filter(item)]
|
||||
|
||||
if preload:
|
||||
for item in items:
|
||||
if "chunks" in item:
|
||||
hlid = item.get("hlid", None)
|
||||
if hlid is None:
|
||||
preload_chunks = True
|
||||
else:
|
||||
if hlid in hlids_preloaded:
|
||||
preload_chunks = False
|
||||
else:
|
||||
# not having the hardlink's chunks already preloaded for other hardlink to same inode
|
||||
preload_chunks = True
|
||||
hlids_preloaded.add(hlid)
|
||||
if preload_chunks:
|
||||
self.repository.preload([c.id for c in item.chunks])
|
||||
|
||||
for item in items:
|
||||
if filter and not filter(item):
|
||||
continue
|
||||
if preload and "chunks" in item:
|
||||
hlid = item.get("hlid", None)
|
||||
if hlid is None:
|
||||
preload_chunks = True
|
||||
elif hlid in hlids_preloaded:
|
||||
preload_chunks = False
|
||||
else:
|
||||
# not having the hardlink's chunks already preloaded for other hardlink to same inode
|
||||
preload_chunks = True
|
||||
hlids_preloaded.add(hlid)
|
||||
if preload_chunks:
|
||||
self.repository.preload([c.id for c in item.chunks])
|
||||
yield item
|
||||
|
||||
def fetch_many(self, ids, is_preloaded=False):
|
||||
|
@ -631,10 +625,9 @@ Duration: {0.duration}
|
|||
def iter_items(self, filter=None, preload=False):
|
||||
# note: when calling this with preload=True, later fetch_many() must be called with
|
||||
# is_preloaded=True or the RemoteRepository code will leak memory!
|
||||
for item in self.pipeline.unpack_many(
|
||||
yield from self.pipeline.unpack_many(
|
||||
self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)
|
||||
):
|
||||
yield item
|
||||
)
|
||||
|
||||
def add_item(self, item, show_progress=True, stats=None):
|
||||
if show_progress and self.show_progress:
|
||||
|
@ -1123,55 +1116,59 @@ Duration: {0.duration}
|
|||
logger.warning("borg check --repair is required to free all space.")
|
||||
|
||||
@staticmethod
|
||||
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
|
||||
def compare_archives_iter(
|
||||
archive1: "Archive", archive2: "Archive", matcher=None, can_compare_chunk_ids=False
|
||||
) -> Iterator[ItemDiff]:
|
||||
"""
|
||||
Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
|
||||
Yields an ItemDiff instance describing changes/indicating equality.
|
||||
|
||||
:param matcher: PatternMatcher class to restrict results to only matching paths.
|
||||
:param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
|
||||
"""
|
||||
|
||||
def compare_items(item1, item2):
|
||||
def compare_items(path: str, item1: Item, item2: Item):
|
||||
return ItemDiff(
|
||||
path,
|
||||
item1,
|
||||
item2,
|
||||
archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
|
||||
archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
|
||||
can_compare_chunk_ids=can_compare_chunk_ids,
|
||||
content_only=content_only,
|
||||
)
|
||||
|
||||
orphans_archive1 = OrderedDict()
|
||||
orphans_archive2 = OrderedDict()
|
||||
orphans_archive1: OrderedDict[str, Item] = OrderedDict()
|
||||
orphans_archive2: OrderedDict[str, Item] = OrderedDict()
|
||||
|
||||
assert matcher is not None, "matcher must be set"
|
||||
|
||||
for item1, item2 in zip_longest(
|
||||
archive1.iter_items(lambda item: matcher.match(item.path)),
|
||||
archive2.iter_items(lambda item: matcher.match(item.path)),
|
||||
):
|
||||
if item1 and item2 and item1.path == item2.path:
|
||||
yield (item1.path, compare_items(item1, item2))
|
||||
yield compare_items(item1.path, item1, item2)
|
||||
continue
|
||||
if item1:
|
||||
matching_orphan = orphans_archive2.pop(item1.path, None)
|
||||
if matching_orphan:
|
||||
yield (item1.path, compare_items(item1, matching_orphan))
|
||||
yield compare_items(item1.path, item1, matching_orphan)
|
||||
else:
|
||||
orphans_archive1[item1.path] = item1
|
||||
if item2:
|
||||
matching_orphan = orphans_archive1.pop(item2.path, None)
|
||||
if matching_orphan:
|
||||
yield (matching_orphan.path, compare_items(matching_orphan, item2))
|
||||
yield compare_items(matching_orphan.path, matching_orphan, item2)
|
||||
else:
|
||||
orphans_archive2[item2.path] = item2
|
||||
# At this point orphans_* contain items that had no matching partner in the other archive
|
||||
for added in orphans_archive2.values():
|
||||
path = added.path
|
||||
deleted_item = Item.create_deleted(path)
|
||||
yield (path, compare_items(deleted_item, added))
|
||||
yield compare_items(path, deleted_item, added)
|
||||
for deleted in orphans_archive1.values():
|
||||
path = deleted.path
|
||||
deleted_item = Item.create_deleted(path)
|
||||
yield (path, compare_items(deleted, deleted_item))
|
||||
yield compare_items(path, deleted, deleted_item)
|
||||
|
||||
|
||||
class MetadataCollector:
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
import argparse
|
||||
import textwrap
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
from ._common import with_repository, with_archive, build_matcher
|
||||
from ._common import with_repository, with_archive, build_matcher, Highlander
|
||||
from ..archive import Archive
|
||||
from ..constants import * # NOQA
|
||||
from ..helpers import archivename_validator
|
||||
from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, BorgJsonEncoder
|
||||
from ..manifest import Manifest
|
||||
from ..helpers.parseformat import BorgJsonEncoder
|
||||
|
||||
from ..logger import create_logger
|
||||
|
||||
logger = create_logger()
|
||||
|
@ -18,14 +19,12 @@ class DiffMixIn:
|
|||
@with_archive
|
||||
def do_diff(self, args, repository, manifest, archive):
|
||||
"""Diff contents of two archives"""
|
||||
|
||||
def print_json_output(diff, path):
|
||||
print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))
|
||||
|
||||
def print_text_output(diff, path):
|
||||
print("{:<19} {}".format(" ".join([str for j, str in diff]), path))
|
||||
|
||||
print_output = print_json_output if args.json_lines else print_text_output
|
||||
if args.format is not None:
|
||||
format = args.format
|
||||
elif args.content_only:
|
||||
format = "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}"
|
||||
else:
|
||||
format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")
|
||||
|
||||
archive1 = archive
|
||||
archive2 = Archive(manifest, args.other_name)
|
||||
|
@ -43,17 +42,36 @@ class DiffMixIn:
|
|||
|
||||
matcher = build_matcher(args.patterns, args.paths)
|
||||
|
||||
diffs = Archive.compare_archives_iter(
|
||||
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
|
||||
diffs_iter = Archive.compare_archives_iter(
|
||||
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
|
||||
)
|
||||
# Conversion to string and filtering for diff.equal to save memory if sorting
|
||||
diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
|
||||
diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))
|
||||
|
||||
if args.sort:
|
||||
diffs = sorted(diffs)
|
||||
diffs = sorted(diffs, key=lambda diff: diff.path)
|
||||
|
||||
for path, diff in diffs:
|
||||
print_output(diff, path)
|
||||
formatter = DiffFormatter(format, args.content_only)
|
||||
for diff in diffs:
|
||||
if args.json_lines:
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"path": diff.path,
|
||||
"changes": [
|
||||
change.to_dict()
|
||||
for name, change in diff.changes().items()
|
||||
if not args.content_only or (name not in DiffFormatter.METADATA)
|
||||
],
|
||||
},
|
||||
sort_keys=True,
|
||||
cls=BorgJsonEncoder,
|
||||
)
|
||||
)
|
||||
else:
|
||||
res: str = formatter.format_item(diff)
|
||||
if res.strip():
|
||||
sys.stdout.write(res)
|
||||
|
||||
for pattern in matcher.get_unmatched_include_patterns():
|
||||
self.print_warning("Include pattern '%s' never matched.", pattern)
|
||||
|
@ -64,25 +82,48 @@ class DiffMixIn:
|
|||
from ._common import process_epilog
|
||||
from ._common import define_exclusion_group
|
||||
|
||||
diff_epilog = process_epilog(
|
||||
"""
|
||||
This command finds differences (file contents, user/group/mode) between archives.
|
||||
diff_epilog = (
|
||||
process_epilog(
|
||||
"""
|
||||
This command finds differences (file contents, metadata) between ARCHIVE1 and ARCHIVE2.
|
||||
|
||||
A repository location and an archive name must be specified for REPO::ARCHIVE1.
|
||||
ARCHIVE2 is just another archive name in same repository (no repository location
|
||||
allowed).
|
||||
For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
|
||||
|
||||
For archives created with Borg 1.1 or newer diff automatically detects whether
|
||||
the archives are created with the same chunker params. If so, only chunk IDs
|
||||
are compared, which is very fast.
|
||||
.. man NOTES
|
||||
|
||||
For archives prior to Borg 1.1 chunk contents are compared by default.
|
||||
If you did not create the archives with different chunker params,
|
||||
pass ``--same-chunker-params``.
|
||||
Note that the chunker params changed from Borg 0.xx to 1.0.
|
||||
The FORMAT specifier syntax
|
||||
+++++++++++++++++++++++++++
|
||||
|
||||
For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
|
||||
"""
|
||||
The ``--format`` option uses python's `format string syntax
|
||||
<https://docs.python.org/3.9/library/string.html#formatstrings>`_.
|
||||
|
||||
Examples:
|
||||
::
|
||||
|
||||
$ borg diff --format '{content:30} {path}{NL}' ArchiveFoo ArchiveBar
|
||||
modified: +4.1 kB -1.0 kB file-diff
|
||||
...
|
||||
|
||||
# {VAR:<NUMBER} - pad to NUMBER columns left-aligned.
|
||||
# {VAR:>NUMBER} - pad to NUMBER columns right-aligned.
|
||||
$ borg diff --format '{content:>30} {path}{NL}' ArchiveFoo ArchiveBar
|
||||
modified: +4.1 kB -1.0 kB file-diff
|
||||
...
|
||||
|
||||
The following keys are always available:
|
||||
|
||||
|
||||
"""
|
||||
)
|
||||
+ BaseFormatter.keys_help()
|
||||
+ textwrap.dedent(
|
||||
"""
|
||||
|
||||
Keys available only when showing differences between archives:
|
||||
|
||||
"""
|
||||
)
|
||||
+ DiffFormatter.keys_help()
|
||||
)
|
||||
subparser = subparsers.add_parser(
|
||||
"diff",
|
||||
|
@ -107,6 +148,13 @@ class DiffMixIn:
|
|||
help="Override check of chunker parameters.",
|
||||
)
|
||||
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
|
||||
subparser.add_argument(
|
||||
"--format",
|
||||
metavar="FORMAT",
|
||||
dest="format",
|
||||
action=Highlander,
|
||||
help='specify format for differences between archives (default: "{change} {path}{NL}")',
|
||||
)
|
||||
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
|
||||
subparser.add_argument(
|
||||
"--content-only",
|
||||
|
|
|
@ -29,10 +29,9 @@ class ListMixIn:
|
|||
|
||||
def _list_inner(cache):
|
||||
archive = Archive(manifest, args.name, cache=cache)
|
||||
|
||||
formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
|
||||
formatter = ItemFormatter(archive, format)
|
||||
for item in archive.iter_items(lambda item: matcher.match(item.path)):
|
||||
sys.stdout.write(formatter.format_item(item))
|
||||
sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))
|
||||
|
||||
# Only load the cache if it will be used
|
||||
if ItemFormatter.format_needs_cache(format):
|
||||
|
|
|
@ -89,7 +89,7 @@ class PruneMixIn:
|
|||
format = "{archive}"
|
||||
else:
|
||||
format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]")
|
||||
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=False, iec=args.iec)
|
||||
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
|
||||
|
||||
checkpoint_re = r"\.checkpoint(\.\d+)?"
|
||||
archives_checkpoints = manifest.archives.list(
|
||||
|
@ -169,7 +169,7 @@ class PruneMixIn:
|
|||
or (args.list_pruned and archive in to_delete)
|
||||
or (args.list_kept and archive not in to_delete)
|
||||
):
|
||||
list_logger.info(f"{log_message:<40} {formatter.format_item(archive)}")
|
||||
list_logger.info(f"{log_message:<40} {formatter.format_item(archive, jsonline=False)}")
|
||||
pi.finish()
|
||||
if sig_int:
|
||||
# Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
|
||||
|
|
|
@ -23,15 +23,15 @@ class RListMixIn:
|
|||
format = "{archive}{NL}"
|
||||
else:
|
||||
format = os.environ.get("BORG_RLIST_FORMAT", "{archive:<36} {time} [{id}]{NL}")
|
||||
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=args.json, iec=args.iec)
|
||||
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
|
||||
|
||||
output_data = []
|
||||
|
||||
for archive_info in manifest.archives.list_considering(args):
|
||||
if args.json:
|
||||
output_data.append(formatter.get_item_data(archive_info))
|
||||
output_data.append(formatter.get_item_data(archive_info, args.json))
|
||||
else:
|
||||
sys.stdout.write(formatter.format_item(archive_info))
|
||||
sys.stdout.write(formatter.format_item(archive_info, args.json))
|
||||
|
||||
if args.json:
|
||||
json_print(basic_json_data(manifest, extra={"archives": output_data}))
|
||||
|
|
|
@ -28,7 +28,7 @@ from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Locatio
|
|||
from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
|
||||
from .parseformat import format_archive, parse_stringified_list, clean_lines
|
||||
from .parseformat import location_validator, archivename_validator, comment_validator
|
||||
from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
|
||||
from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, DiffFormatter, file_status
|
||||
from .parseformat import swidth_slice, ellipsis_truncate
|
||||
from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
|
||||
from .parseformat import Highlander, MakePathSafeAction
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import abc
|
||||
import argparse
|
||||
import base64
|
||||
import hashlib
|
||||
|
@ -8,6 +9,7 @@ import re
|
|||
import shlex
|
||||
import stat
|
||||
import uuid
|
||||
from typing import List, Dict, Set, Tuple, ClassVar, Any, TYPE_CHECKING, Literal
|
||||
from binascii import hexlify
|
||||
from collections import Counter, OrderedDict
|
||||
from datetime import datetime, timezone
|
||||
|
@ -27,6 +29,9 @@ from .. import __version_tuple__ as borg_version_tuple
|
|||
from ..constants import * # NOQA
|
||||
from ..platformflags import is_win32
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..item import ItemDiff
|
||||
|
||||
|
||||
def bin_to_hex(binary):
|
||||
return hexlify(binary).decode("ascii")
|
||||
|
@ -649,8 +654,10 @@ def archivename_validator(text):
|
|||
return validate_text(text)
|
||||
|
||||
|
||||
class BaseFormatter:
|
||||
FIXED_KEYS = {
|
||||
class BaseFormatter(metaclass=abc.ABCMeta):
|
||||
format: str
|
||||
static_data: Dict[str, Any]
|
||||
FIXED_KEYS: ClassVar[Dict[str, str]] = {
|
||||
# Formatting aids
|
||||
"LF": "\n",
|
||||
"SPACE": " ",
|
||||
|
@ -660,25 +667,49 @@ class BaseFormatter:
|
|||
"NEWLINE": "\n",
|
||||
"NL": "\n", # \n is automatically converted to os.linesep on write
|
||||
}
|
||||
KEY_DESCRIPTIONS: ClassVar[Dict[str, str]] = {
|
||||
"NEWLINE": "OS dependent line separator",
|
||||
"NL": "alias of NEWLINE",
|
||||
"NUL": "NUL character for creating print0 / xargs -0 like output",
|
||||
"SPACE": "space character",
|
||||
"TAB": "tab character",
|
||||
"CR": "carriage return character",
|
||||
"LF": "line feed character",
|
||||
}
|
||||
KEY_GROUPS: ClassVar[Tuple[Tuple[str, ...], ...]] = (("NEWLINE", "NL", "NUL", "SPACE", "TAB", "CR", "LF"),)
|
||||
|
||||
def get_item_data(self, item):
|
||||
def __init__(self, format: str, static: Dict[str, Any]) -> None:
|
||||
self.format = partial_format(format, static)
|
||||
self.static_data = static
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_item_data(self, item, jsonline=False) -> dict:
|
||||
raise NotImplementedError
|
||||
|
||||
def format_item(self, item):
|
||||
return self.format.format_map(self.get_item_data(item))
|
||||
|
||||
@staticmethod
|
||||
def keys_help():
|
||||
def format_item(self, item, jsonline=False, sort=False):
|
||||
data = self.get_item_data(item, jsonline)
|
||||
return (
|
||||
"- NEWLINE: OS dependent line separator\n"
|
||||
"- NL: alias of NEWLINE\n"
|
||||
"- NUL: NUL character for creating print0 / xargs -0 like output\n"
|
||||
"- SPACE\n"
|
||||
"- TAB\n"
|
||||
"- CR\n"
|
||||
"- LF"
|
||||
f"{json.dumps(data, cls=BorgJsonEncoder, sort_keys=sort)}\n" if jsonline else self.format.format_map(data)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def keys_help(cls):
|
||||
help = []
|
||||
keys: Set[str] = set()
|
||||
keys.update(cls.KEY_DESCRIPTIONS.keys())
|
||||
keys.update(key for group in cls.KEY_GROUPS for key in group)
|
||||
|
||||
for group in cls.KEY_GROUPS:
|
||||
for key in group:
|
||||
keys.remove(key)
|
||||
text = "- " + key
|
||||
if key in cls.KEY_DESCRIPTIONS:
|
||||
text += ": " + cls.KEY_DESCRIPTIONS[key]
|
||||
help.append(text)
|
||||
help.append("")
|
||||
assert not keys, str(keys)
|
||||
return "\n".join(help)
|
||||
|
||||
|
||||
class ArchiveFormatter(BaseFormatter):
|
||||
KEY_DESCRIPTIONS = {
|
||||
|
@ -703,47 +734,17 @@ class ArchiveFormatter(BaseFormatter):
|
|||
("size", "nfiles"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def available_keys(cls):
|
||||
from ..manifest import ArchiveInfo
|
||||
|
||||
fake_archive_info = ArchiveInfo("archivename", b"\1" * 32, datetime(1970, 1, 1, tzinfo=timezone.utc))
|
||||
formatter = cls("", None, None, None)
|
||||
keys = []
|
||||
keys.extend(formatter.call_keys.keys())
|
||||
keys.extend(formatter.get_item_data(fake_archive_info).keys())
|
||||
return keys
|
||||
|
||||
@classmethod
|
||||
def keys_help(cls):
|
||||
help = []
|
||||
keys = cls.available_keys()
|
||||
for key in cls.FIXED_KEYS:
|
||||
keys.remove(key)
|
||||
|
||||
for group in cls.KEY_GROUPS:
|
||||
for key in group:
|
||||
keys.remove(key)
|
||||
text = "- " + key
|
||||
if key in cls.KEY_DESCRIPTIONS:
|
||||
text += ": " + cls.KEY_DESCRIPTIONS[key]
|
||||
help.append(text)
|
||||
help.append("")
|
||||
assert not keys, str(keys)
|
||||
return "\n".join(help)
|
||||
|
||||
def __init__(self, format, repository, manifest, key, *, json=False, iec=False):
|
||||
def __init__(self, format, repository, manifest, key, *, iec=False):
|
||||
static_data = {} # here could be stuff on repo level, above archive level
|
||||
static_data.update(self.FIXED_KEYS)
|
||||
super().__init__(format, static_data)
|
||||
self.repository = repository
|
||||
self.manifest = manifest
|
||||
self.key = key
|
||||
self.name = None
|
||||
self.id = None
|
||||
self._archive = None
|
||||
self.json = json
|
||||
self.iec = iec
|
||||
static_keys = {} # here could be stuff on repo level, above archive level
|
||||
static_keys.update(self.FIXED_KEYS)
|
||||
self.format = partial_format(format, static_keys)
|
||||
self.format_keys = {f[1] for f in Formatter().parse(format)}
|
||||
self.call_keys = {
|
||||
"hostname": partial(self.get_meta, "hostname", ""),
|
||||
|
@ -755,20 +756,12 @@ class ArchiveFormatter(BaseFormatter):
|
|||
"end": self.get_ts_end,
|
||||
}
|
||||
self.used_call_keys = set(self.call_keys) & self.format_keys
|
||||
if self.json:
|
||||
self.item_data = {}
|
||||
self.format_item = self.format_item_json
|
||||
else:
|
||||
self.item_data = static_keys
|
||||
|
||||
def format_item_json(self, item):
|
||||
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + "\n"
|
||||
|
||||
def get_item_data(self, archive_info):
|
||||
def get_item_data(self, archive_info, jsonline=False):
|
||||
self.name = archive_info.name
|
||||
self.id = archive_info.id
|
||||
item_data = {}
|
||||
item_data.update(self.item_data)
|
||||
item_data.update({} if jsonline else self.static_data)
|
||||
item_data.update(
|
||||
{
|
||||
"name": archive_info.name,
|
||||
|
@ -812,15 +805,31 @@ class ItemFormatter(BaseFormatter):
|
|||
# shake_* is not provided because it uses an incompatible .digest() method to support variable length.
|
||||
hash_algorithms = set(hashlib.algorithms_guaranteed).union({"xxh64"}).difference({"shake_128", "shake_256"})
|
||||
KEY_DESCRIPTIONS = {
|
||||
"type": "file type (file, dir, symlink, ...)",
|
||||
"mode": "file mode (as in stat)",
|
||||
"uid": "user id of file owner",
|
||||
"gid": "group id of file owner",
|
||||
"user": "user name of file owner",
|
||||
"group": "group name of file owner",
|
||||
"path": "file path",
|
||||
"target": "link target for symlinks",
|
||||
"hlid": "hard link identity (same if hardlinking same fs object)",
|
||||
"flags": "file flags",
|
||||
"extra": 'prepends {target} with " -> " for soft links and " link to " for hard links',
|
||||
"size": "file size",
|
||||
"dsize": "deduplicated size",
|
||||
"num_chunks": "number of chunks in this file",
|
||||
"unique_chunks": "number of unique chunks in this file",
|
||||
"mtime": "file modification time",
|
||||
"ctime": "file change time",
|
||||
"atime": "file access time",
|
||||
"isomtime": "file modification time (ISO 8601 format)",
|
||||
"isoctime": "file change time (ISO 8601 format)",
|
||||
"isoatime": "file access time (ISO 8601 format)",
|
||||
"xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
|
||||
"health": 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
|
||||
"archiveid": "internal ID of the archive",
|
||||
"archivename": "name of the archive",
|
||||
}
|
||||
KEY_GROUPS = (
|
||||
("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "flags"),
|
||||
|
@ -833,57 +842,19 @@ class ItemFormatter(BaseFormatter):
|
|||
|
||||
KEYS_REQUIRING_CACHE = ("dsize", "unique_chunks")
|
||||
|
||||
@classmethod
|
||||
def available_keys(cls):
|
||||
class FakeArchive:
|
||||
fpr = name = ""
|
||||
|
||||
from ..item import Item
|
||||
|
||||
fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
|
||||
formatter = cls(FakeArchive, "")
|
||||
keys = []
|
||||
keys.extend(formatter.call_keys.keys())
|
||||
keys.extend(formatter.get_item_data(fake_item).keys())
|
||||
return keys
|
||||
|
||||
@classmethod
|
||||
def keys_help(cls):
|
||||
help = []
|
||||
keys = cls.available_keys()
|
||||
for key in cls.FIXED_KEYS:
|
||||
keys.remove(key)
|
||||
|
||||
for group in cls.KEY_GROUPS:
|
||||
for key in group:
|
||||
keys.remove(key)
|
||||
text = "- " + key
|
||||
if key in cls.KEY_DESCRIPTIONS:
|
||||
text += ": " + cls.KEY_DESCRIPTIONS[key]
|
||||
help.append(text)
|
||||
help.append("")
|
||||
assert not keys, str(keys)
|
||||
return "\n".join(help)
|
||||
|
||||
@classmethod
|
||||
def format_needs_cache(cls, format):
|
||||
format_keys = {f[1] for f in Formatter().parse(format)}
|
||||
return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
|
||||
|
||||
def __init__(self, archive, format, *, json_lines=False):
|
||||
def __init__(self, archive, format):
|
||||
from ..checksums import StreamingXXH64
|
||||
|
||||
static_data = {"archivename": archive.name, "archiveid": archive.fpr}
|
||||
static_data.update(self.FIXED_KEYS)
|
||||
super().__init__(format, static_data)
|
||||
self.xxh64 = StreamingXXH64
|
||||
self.archive = archive
|
||||
self.json_lines = json_lines
|
||||
static_keys = {"archivename": archive.name, "archiveid": archive.fpr}
|
||||
static_keys.update(self.FIXED_KEYS)
|
||||
if self.json_lines:
|
||||
self.item_data = {}
|
||||
self.format_item = self.format_item_json
|
||||
else:
|
||||
self.item_data = static_keys
|
||||
self.format = partial_format(format, static_keys)
|
||||
self.format_keys = {f[1] for f in Formatter().parse(format)}
|
||||
self.call_keys = {
|
||||
"size": self.calculate_size,
|
||||
|
@ -901,17 +872,14 @@ class ItemFormatter(BaseFormatter):
|
|||
self.call_keys[hash_function] = partial(self.hash_item, hash_function)
|
||||
self.used_call_keys = set(self.call_keys) & self.format_keys
|
||||
|
||||
def format_item_json(self, item):
|
||||
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + "\n"
|
||||
|
||||
def get_item_data(self, item):
|
||||
def get_item_data(self, item, jsonline=False):
|
||||
item_data = {}
|
||||
item_data.update(self.item_data)
|
||||
item_data.update({} if jsonline else self.static_data)
|
||||
|
||||
item_data.update(text_to_json("path", item.path))
|
||||
target = item.get("target", "")
|
||||
item_data.update(text_to_json("target", target))
|
||||
if not self.json_lines:
|
||||
if not jsonline:
|
||||
item_data["extra"] = "" if not target else f" -> {item_data['target']}"
|
||||
|
||||
hlid = item.get("hlid")
|
||||
|
@ -928,7 +896,7 @@ class ItemFormatter(BaseFormatter):
|
|||
item_data.update(text_to_json("user", item.get("user", str(item_data["uid"]))))
|
||||
item_data.update(text_to_json("group", item.get("group", str(item_data["gid"]))))
|
||||
|
||||
if self.json_lines:
|
||||
if jsonline:
|
||||
item_data["healthy"] = "chunks_healthy" not in item
|
||||
else:
|
||||
item_data["health"] = "broken" if "chunks_healthy" in item else "healthy"
|
||||
|
@ -944,7 +912,7 @@ class ItemFormatter(BaseFormatter):
|
|||
|
||||
item: The item to sum its unique chunks' metadata
|
||||
metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
|
||||
the metadata needed from the chunk
|
||||
the metadata needed from the chunk
|
||||
"""
|
||||
chunk_index = self.archive.cache.chunks
|
||||
chunks = item.get("chunks", [])
|
||||
|
@ -976,6 +944,134 @@ class ItemFormatter(BaseFormatter):
|
|||
return self.format_time(key, item).isoformat()
|
||||
|
||||
|
||||
class DiffFormatter(BaseFormatter):
|
||||
KEY_DESCRIPTIONS = {
|
||||
"path": "archived file path",
|
||||
"change": "all available changes",
|
||||
"content": "file content change",
|
||||
"mode": "file mode change",
|
||||
"type": "file type change",
|
||||
"owner": "file owner (user/group) change",
|
||||
"user": "file user change",
|
||||
"group": "file group change",
|
||||
"link": "file link change",
|
||||
"directory": "file directory change",
|
||||
"blkdev": "file block device change",
|
||||
"chrdev": "file character device change",
|
||||
"fifo": "file fifo change",
|
||||
"mtime": "file modification time change",
|
||||
"ctime": "file change time change",
|
||||
"isomtime": "file modification time change (ISO 8601)",
|
||||
"isoctime": "file creation time change (ISO 8601)",
|
||||
}
|
||||
KEY_GROUPS = (
|
||||
("path", "change"),
|
||||
("content", "mode", "type", "owner", "group", "user"),
|
||||
("link", "directory", "blkdev", "chrdev", "fifo"),
|
||||
("mtime", "ctime", "isomtime", "isoctime"),
|
||||
)
|
||||
METADATA = ("mode", "type", "owner", "group", "user", "mtime", "ctime")
|
||||
|
||||
def __init__(self, format, content_only=False):
|
||||
static_data = {}
|
||||
static_data.update(self.FIXED_KEYS)
|
||||
super().__init__(format or "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}", static_data)
|
||||
self.content_only = content_only
|
||||
self.format_keys = {f[1] for f in Formatter().parse(format)}
|
||||
self.call_keys = {
|
||||
"content": self.format_content,
|
||||
"mode": self.format_mode,
|
||||
"type": partial(self.format_mode, filetype=True),
|
||||
"owner": partial(self.format_owner),
|
||||
"group": partial(self.format_owner, spec="group"),
|
||||
"user": partial(self.format_owner, spec="user"),
|
||||
"link": partial(self.format_other, "link"),
|
||||
"directory": partial(self.format_other, "directory"),
|
||||
"blkdev": partial(self.format_other, "blkdev"),
|
||||
"chrdev": partial(self.format_other, "chrdev"),
|
||||
"fifo": partial(self.format_other, "fifo"),
|
||||
"mtime": partial(self.format_time, "mtime"),
|
||||
"ctime": partial(self.format_time, "ctime"),
|
||||
"isomtime": partial(self.format_iso_time, "mtime"),
|
||||
"isoctime": partial(self.format_iso_time, "ctime"),
|
||||
}
|
||||
self.used_call_keys = set(self.call_keys) & self.format_keys
|
||||
if self.content_only:
|
||||
self.used_call_keys -= set(self.METADATA)
|
||||
|
||||
def get_item_data(self, item: "ItemDiff", jsonline=False) -> dict:
|
||||
diff_data = {}
|
||||
for key in self.used_call_keys:
|
||||
diff_data[key] = self.call_keys[key](item)
|
||||
|
||||
change = []
|
||||
for key in self.call_keys:
|
||||
if key in ("isomtime", "isoctime"):
|
||||
continue
|
||||
if self.content_only and key in self.METADATA:
|
||||
continue
|
||||
change.append(self.call_keys[key](item))
|
||||
diff_data["change"] = " ".join([v for v in change if v])
|
||||
diff_data["path"] = item.path
|
||||
diff_data.update({} if jsonline else self.static_data)
|
||||
return diff_data
|
||||
|
||||
def format_other(self, key, diff: "ItemDiff"):
|
||||
change = diff.changes().get(key)
|
||||
return f"{change.diff_type}".ljust(27) if change else "" # 27 is the length of the content change
|
||||
|
||||
def format_mode(self, diff: "ItemDiff", filetype=False):
|
||||
change = diff.type() if filetype else diff.mode()
|
||||
return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
||||
|
||||
def format_owner(self, diff: "ItemDiff", spec: Literal["owner", "user", "group"] = "owner"):
|
||||
if spec == "user":
|
||||
change = diff.user()
|
||||
return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
||||
if spec == "group":
|
||||
change = diff.group()
|
||||
return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
||||
if spec != "owner":
|
||||
raise ValueError(f"Invalid owner spec: {spec}")
|
||||
change = diff.owner()
|
||||
if change:
|
||||
return "[{}:{} -> {}:{}]".format(
|
||||
change.diff_data["item1"][0],
|
||||
change.diff_data["item1"][1],
|
||||
change.diff_data["item2"][0],
|
||||
change.diff_data["item2"][1],
|
||||
)
|
||||
return ""
|
||||
|
||||
def format_content(self, diff: "ItemDiff"):
|
||||
change = diff.content()
|
||||
if change:
|
||||
if change.diff_type == "added":
|
||||
return "{}: {:>20}".format(change.diff_type, format_file_size(change.diff_data["added"]))
|
||||
if change.diff_type == "removed":
|
||||
return "{}: {:>18}".format(change.diff_type, format_file_size(change.diff_data["removed"]))
|
||||
if "added" not in change.diff_data and "removed" not in change.diff_data:
|
||||
return "modified: (can't get size)"
|
||||
return "{}: {:>8} {:>8}".format(
|
||||
change.diff_type,
|
||||
format_file_size(change.diff_data["added"], precision=1, sign=True),
|
||||
format_file_size(-change.diff_data["removed"], precision=1, sign=True),
|
||||
)
|
||||
return ""
|
||||
|
||||
def format_time(self, key, diff: "ItemDiff"):
|
||||
change = diff.changes().get(key)
|
||||
return f"[{key}: {change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
|
||||
|
||||
def format_iso_time(self, key, diff: "ItemDiff"):
|
||||
change = diff.changes().get(key)
|
||||
return (
|
||||
f"[{key}: {change.diff_data['item1'].isoformat()} -> {change.diff_data['item2'].isoformat()}]"
|
||||
if change
|
||||
else ""
|
||||
)
|
||||
|
||||
|
||||
def file_status(mode):
|
||||
if stat.S_ISREG(mode):
|
||||
return "A"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any
|
||||
from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any, Optional
|
||||
|
||||
from .helpers import StableDict
|
||||
|
||||
|
@ -247,9 +247,36 @@ class ManifestItem(PropDict):
|
|||
@item_keys.setter
|
||||
def item_keys(self, val: Tuple) -> None: ...
|
||||
|
||||
class DiffChange:
|
||||
diff_type: str
|
||||
diff_data: Dict[str, Any]
|
||||
def __init__(self, diff_type: str, diff_data: Optional[Dict[str, Any]] = ...) -> None: ...
|
||||
def to_dict(self) -> Dict[str, Any]: ...
|
||||
|
||||
class ItemDiff:
|
||||
def __init__(self, *args, **kwargs) -> None: ...
|
||||
def _chunk_content_equal(self, c1: Iterator, c2: Iterator) -> bool: ...
|
||||
path: str
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
item1: Item,
|
||||
item2: Item,
|
||||
chunk_1: Iterator,
|
||||
chunk_2: Iterator,
|
||||
numeric_ids: bool = ...,
|
||||
can_compare_chunk_ids: bool = ...,
|
||||
) -> None: ...
|
||||
def changes(self) -> Dict[str, DiffChange]: ...
|
||||
def equal(self, content_only: bool = ...) -> bool: ...
|
||||
def content(self) -> Optional[DiffChange]: ...
|
||||
def ctime(self) -> Optional[DiffChange]: ...
|
||||
def mtime(self) -> Optional[DiffChange]: ...
|
||||
def mode(self) -> Optional[DiffChange]: ...
|
||||
def type(self) -> Optional[DiffChange]: ...
|
||||
def owner(self) -> Optional[DiffChange]: ...
|
||||
def user(self) -> Optional[DiffChange]: ...
|
||||
def group(self) -> Optional[DiffChange]: ...
|
||||
|
||||
def chunk_content_equal(chunks_a: Iterator, chunks_b: Iterator) -> bool: ...
|
||||
|
||||
class Key(PropDict):
|
||||
@property
|
||||
|
|
|
@ -620,66 +620,76 @@ cpdef _init_names():
|
|||
_init_names()
|
||||
|
||||
|
||||
class DiffChange:
|
||||
"""
|
||||
Stores a change in a diff.
|
||||
|
||||
The diff_type denotes the type of change, e.g. "added", "removed", "modified".
|
||||
The diff_data contains additional information about the change, e.g. the old and new mode.
|
||||
"""
|
||||
def __init__(self, diff_type, diff_data=None):
|
||||
self.diff_type = diff_type
|
||||
self.diff_data = diff_data or {}
|
||||
|
||||
def to_dict(self):
|
||||
return {"type": self.diff_type, **self.diff_data}
|
||||
|
||||
|
||||
class ItemDiff:
|
||||
"""
|
||||
Comparison of two items from different archives.
|
||||
|
||||
The items may have different paths and still be considered equal (e.g. for renames).
|
||||
It does not include extended or time attributes in the comparison.
|
||||
"""
|
||||
|
||||
def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
|
||||
def __init__(self, path, item1, item2, chunk_1, chunk_2, numeric_ids=False, can_compare_chunk_ids=False):
|
||||
self.path = path
|
||||
self._item1 = item1
|
||||
self._item2 = item2
|
||||
self._content_only = content_only
|
||||
self._numeric_ids = numeric_ids
|
||||
self._can_compare_chunk_ids = can_compare_chunk_ids
|
||||
self.equal = self._equal(chunk_iterator1, chunk_iterator2)
|
||||
changes = []
|
||||
self._chunk_1 = chunk_1
|
||||
self._chunk_2 = chunk_2
|
||||
|
||||
self._changes = {}
|
||||
|
||||
if self._item1.is_link() or self._item2.is_link():
|
||||
changes.append(self._link_diff())
|
||||
self._link_diff()
|
||||
|
||||
if 'chunks' in self._item1 and 'chunks' in self._item2:
|
||||
changes.append(self._content_diff())
|
||||
self._content_diff()
|
||||
|
||||
if self._item1.is_dir() or self._item2.is_dir():
|
||||
changes.append(self._presence_diff('directory'))
|
||||
self._presence_diff('directory')
|
||||
|
||||
if self._item1.is_blk() or self._item2.is_blk():
|
||||
changes.append(self._presence_diff('blkdev'))
|
||||
self._presence_diff('blkdev')
|
||||
|
||||
if self._item1.is_chr() or self._item2.is_chr():
|
||||
changes.append(self._presence_diff('chrdev'))
|
||||
self._presence_diff('chrdev')
|
||||
|
||||
if self._item1.is_fifo() or self._item2.is_fifo():
|
||||
changes.append(self._presence_diff('fifo'))
|
||||
self._presence_diff('fifo')
|
||||
|
||||
if not self._content_only:
|
||||
if not (self._item1.get('deleted') or self._item2.get('deleted')):
|
||||
changes.append(self._owner_diff())
|
||||
changes.append(self._mode_diff())
|
||||
changes.extend(self._time_diffs())
|
||||
if not (self._item1.get('deleted') or self._item2.get('deleted')):
|
||||
self._owner_diff()
|
||||
self._mode_diff()
|
||||
self._time_diffs()
|
||||
|
||||
# filter out empty changes
|
||||
self._changes = [ch for ch in changes if ch]
|
||||
|
||||
def changes(self):
|
||||
return self._changes
|
||||
|
||||
def __repr__(self):
|
||||
if self.equal:
|
||||
return 'equal'
|
||||
return ' '.join(str for d, str in self._changes)
|
||||
return (' '.join(self._changes.keys())) or 'equal'
|
||||
|
||||
def _equal(self, chunk_iterator1, chunk_iterator2):
|
||||
def equal(self, content_only=False):
|
||||
# if both are deleted, there is nothing at path regardless of what was deleted
|
||||
if self._item1.get('deleted') and self._item2.get('deleted'):
|
||||
return True
|
||||
|
||||
attr_list = ['deleted', 'target']
|
||||
|
||||
if not self._content_only:
|
||||
if not content_only:
|
||||
attr_list += ['mode', 'ctime', 'mtime']
|
||||
attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
|
||||
|
||||
|
@ -693,74 +703,107 @@ class ItemDiff:
|
|||
return False
|
||||
|
||||
if 'chunks' in self._item1 and 'chunks' in self._item2:
|
||||
return self._content_equal(chunk_iterator1, chunk_iterator2)
|
||||
return self._content_equal()
|
||||
|
||||
return True
|
||||
|
||||
def _presence_diff(self, item_type):
|
||||
if not self._item1.get('deleted') and self._item2.get('deleted'):
|
||||
chg = 'removed ' + item_type
|
||||
return ({"type": chg}, chg)
|
||||
self._changes[item_type] = DiffChange(f"removed {item_type}")
|
||||
return True
|
||||
if self._item1.get('deleted') and not self._item2.get('deleted'):
|
||||
chg = 'added ' + item_type
|
||||
return ({"type": chg}, chg)
|
||||
self._changes[item_type] = DiffChange(f"added {item_type}")
|
||||
return True
|
||||
|
||||
def _link_diff(self):
|
||||
pd = self._presence_diff('link')
|
||||
if pd is not None:
|
||||
return pd
|
||||
if self._presence_diff('link'):
|
||||
return True
|
||||
if 'target' in self._item1 and 'target' in self._item2 and self._item1.target != self._item2.target:
|
||||
return ({"type": 'changed link'}, 'changed link')
|
||||
self._changes['link'] = DiffChange('changed link')
|
||||
return True
|
||||
|
||||
def _content_diff(self):
|
||||
if self._item1.get('deleted'):
|
||||
sz = self._item2.get_size()
|
||||
return ({"type": "added", "size": sz}, 'added {:>13}'.format(format_file_size(sz)))
|
||||
self._changes['content'] = DiffChange("added", {"added": sz, "removed": 0})
|
||||
return True
|
||||
if self._item2.get('deleted'):
|
||||
sz = self._item1.get_size()
|
||||
return ({"type": "removed", "size": sz}, 'removed {:>11}'.format(format_file_size(sz)))
|
||||
self._changes['content'] = DiffChange("removed", {"added": 0, "removed": sz})
|
||||
return True
|
||||
if not self._can_compare_chunk_ids:
|
||||
return ({"type": "modified"}, "modified")
|
||||
self._changes['content'] = DiffChange("modified")
|
||||
return True
|
||||
chunk_ids1 = {c.id for c in self._item1.chunks}
|
||||
chunk_ids2 = {c.id for c in self._item2.chunks}
|
||||
added_ids = chunk_ids2 - chunk_ids1
|
||||
removed_ids = chunk_ids1 - chunk_ids2
|
||||
added = self._item2.get_size(consider_ids=added_ids)
|
||||
removed = self._item1.get_size(consider_ids=removed_ids)
|
||||
return ({"type": "modified", "added": added, "removed": removed},
|
||||
'{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
|
||||
format_file_size(-removed, precision=1, sign=True)))
|
||||
self._changes['content'] = DiffChange("modified", {"added": added, "removed": removed})
|
||||
return True
|
||||
|
||||
|
||||
def _owner_diff(self):
|
||||
u_attr, g_attr = ('uid', 'gid') if self._numeric_ids else ('user', 'group')
|
||||
u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr)
|
||||
u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr)
|
||||
if (u1, g1) != (u2, g2):
|
||||
return ({"type": "owner", "old_user": u1, "old_group": g1, "new_user": u2, "new_group": g2},
|
||||
'[{}:{} -> {}:{}]'.format(u1, g1, u2, g2))
|
||||
if (u1, g1) == (u2, g2):
|
||||
return False
|
||||
self._changes['owner'] = DiffChange("changed owner", {"item1": (u1, g1), "item2": (u2, g2)})
|
||||
if u1 != u2:
|
||||
self._changes['user'] = DiffChange("changed user", {"item1": u1, "item2": u2})
|
||||
if g1 != g2:
|
||||
self._changes['group'] = DiffChange("changed group", {"item1": g1, "item2": g2})
|
||||
return True
|
||||
|
||||
def _mode_diff(self):
|
||||
if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode:
|
||||
mode1 = stat.filemode(self._item1.mode)
|
||||
mode2 = stat.filemode(self._item2.mode)
|
||||
return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
|
||||
self._changes['mode'] = DiffChange("changed mode", {"item1": mode1, "item2": mode2})
|
||||
if mode1[0] != mode2[0]:
|
||||
self._changes['type'] = DiffChange("changed type", {"item1": mode1[0], "item2": mode2[0]})
|
||||
|
||||
def _time_diffs(self):
|
||||
changes = []
|
||||
attrs = ["ctime", "mtime"]
|
||||
for attr in attrs:
|
||||
if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
|
||||
ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
|
||||
ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
|
||||
changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
|
||||
return changes
|
||||
self._changes[attr] = DiffChange(attr, {"item1": ts1, "item2": ts2},)
|
||||
return True
|
||||
|
||||
def _content_equal(self, chunk_iterator1, chunk_iterator2):
|
||||
def content(self):
|
||||
return self._changes.get('content')
|
||||
|
||||
def ctime(self):
|
||||
return self._changes.get('ctime')
|
||||
|
||||
def mtime(self):
|
||||
return self._changes.get('mtime')
|
||||
|
||||
def mode(self):
|
||||
return self._changes.get('mode')
|
||||
|
||||
def type(self):
|
||||
return self._changes.get('type')
|
||||
|
||||
def owner(self):
|
||||
return self._changes.get('owner')
|
||||
|
||||
def user(self):
|
||||
return self._changes.get('user')
|
||||
|
||||
def group(self):
|
||||
return self._changes.get('group')
|
||||
|
||||
def _content_equal(self):
|
||||
if self._can_compare_chunk_ids:
|
||||
return self._item1.chunks == self._item2.chunks
|
||||
if self._item1.get_size() != self._item2.get_size():
|
||||
return False
|
||||
return chunks_contents_equal(chunk_iterator1, chunk_iterator2)
|
||||
return chunks_contents_equal(self._chunk_1, self._chunk_2)
|
||||
|
||||
|
||||
def chunks_contents_equal(chunks_a, chunks_b):
|
||||
|
|
|
@ -72,22 +72,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")
|
||||
|
||||
def do_asserts(output, can_compare_ids, content_only=False):
|
||||
# File contents changed (deleted and replaced with a new file)
|
||||
change = "B" if can_compare_ids else "{:<19}".format("modified")
|
||||
lines = output.splitlines()
|
||||
lines: list = output.splitlines()
|
||||
assert "file_replaced" in output # added to debug #3494
|
||||
change = "modified.*B" if can_compare_ids else r"modified: \(can't get size\)"
|
||||
self.assert_line_exists(lines, f"{change}.*input/file_replaced")
|
||||
|
||||
# File unchanged
|
||||
assert "input/file_unchanged" not in output
|
||||
|
||||
# Directory replaced with a regular file
|
||||
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
|
||||
self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")
|
||||
self.assert_line_exists(lines, "[drwxr-xr-x -> -rwxr-xr-x].*input/dir_replaced_with_file")
|
||||
|
||||
# Basic directory cases
|
||||
assert "added directory input/dir_added" in output
|
||||
assert "removed directory input/dir_removed" in output
|
||||
assert "added directory input/dir_added" in output
|
||||
assert "removed directory input/dir_removed" in output
|
||||
|
||||
if are_symlinks_supported():
|
||||
# Basic symlink cases
|
||||
|
@ -96,8 +94,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.assert_line_exists(lines, "removed link.*input/link_removed")
|
||||
|
||||
# Symlink replacing or being replaced
|
||||
assert "input/dir_replaced_with_link" in output
|
||||
assert "input/link_replaced_by_file" in output
|
||||
if not content_only:
|
||||
assert "input/dir_replaced_with_link" in output
|
||||
assert "input/link_replaced_by_file" in output
|
||||
|
||||
# Symlink target removed. Should not affect the symlink at all.
|
||||
assert "input/link_target_removed" not in output
|
||||
|
@ -105,7 +104,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
# The inode has two links and the file contents changed. Borg
|
||||
# should notice the changes in both links. However, the symlink
|
||||
# pointing to the file is not changed.
|
||||
change = "0 B" if can_compare_ids else "{:<19}".format("modified")
|
||||
change = "modified.*0 B" if can_compare_ids else r"modified: \(can't get size\)"
|
||||
self.assert_line_exists(lines, f"{change}.*input/empty")
|
||||
if are_hardlinks_supported():
|
||||
self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
|
||||
|
@ -114,18 +113,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
|
||||
# Added a new file and a hard link to it. Both links to the same
|
||||
# inode should appear as separate files.
|
||||
assert "added 2.05 kB input/file_added" in output
|
||||
assert "added: 2.05 kB input/file_added" in output
|
||||
if are_hardlinks_supported():
|
||||
assert "added 2.05 kB input/hardlink_added" in output
|
||||
assert "added: 2.05 kB input/hardlink_added" in output
|
||||
|
||||
# check if a diff between nonexistent and empty new file is found
|
||||
assert "added 0 B input/file_empty_added" in output
|
||||
assert "added: 0 B input/file_empty_added" in output
|
||||
|
||||
# The inode has two links and both of them are deleted. They should
|
||||
# appear as two deleted files.
|
||||
assert "removed 256 B input/file_removed" in output
|
||||
assert "removed: 256 B input/file_removed" in output
|
||||
if are_hardlinks_supported():
|
||||
assert "removed 256 B input/hardlink_removed" in output
|
||||
assert "removed: 256 B input/hardlink_removed" in output
|
||||
|
||||
if are_hardlinks_supported() and content_only:
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
|
@ -143,7 +142,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
chgsets = [j["changes"] for j in data if j["path"] == filename]
|
||||
assert len(chgsets) < 2
|
||||
# return a flattened list of changes for given filename
|
||||
return [chg for chgset in chgsets for chg in chgset]
|
||||
return sum(chgsets, [])
|
||||
|
||||
# convert output to list of dicts
|
||||
joutput = [json.loads(line) for line in output.split("\n") if line]
|
||||
|
@ -157,7 +156,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
|
||||
# Directory replaced with a regular file
|
||||
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
|
||||
assert {"type": "mode", "old_mode": "drwxr-xr-x", "new_mode": "-rwxr-xr-x"} in get_changes(
|
||||
assert {"type": "changed mode", "item1": "drwxr-xr-x", "item2": "-rwxr-xr-x"} in get_changes(
|
||||
"input/dir_replaced_with_file", joutput
|
||||
)
|
||||
|
||||
|
@ -175,11 +174,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
|
||||
if not content_only:
|
||||
assert any(
|
||||
chg["type"] == "mode" and chg["new_mode"].startswith("l")
|
||||
chg["type"] == "changed mode" and chg["item1"].startswith("d") and chg["item2"].startswith("l")
|
||||
for chg in get_changes("input/dir_replaced_with_link", joutput)
|
||||
), get_changes("input/dir_replaced_with_link", joutput)
|
||||
assert any(
|
||||
chg["type"] == "mode" and chg["old_mode"].startswith("l")
|
||||
chg["type"] == "changed mode" and chg["item1"].startswith("l") and chg["item2"].startswith("-")
|
||||
for chg in get_changes("input/link_replaced_by_file", joutput)
|
||||
), get_changes("input/link_replaced_by_file", joutput)
|
||||
|
||||
|
@ -198,18 +197,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
|
||||
# Added a new file and a hard link to it. Both links to the same
|
||||
# inode should appear as separate files.
|
||||
assert {"type": "added", "size": 2048} in get_changes("input/file_added", joutput)
|
||||
assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/file_added", joutput)
|
||||
if are_hardlinks_supported():
|
||||
assert {"type": "added", "size": 2048} in get_changes("input/hardlink_added", joutput)
|
||||
assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/hardlink_added", joutput)
|
||||
|
||||
# check if a diff between nonexistent and empty new file is found
|
||||
assert {"type": "added", "size": 0} in get_changes("input/file_empty_added", joutput)
|
||||
assert {"added": 0, "removed": 0, "type": "added"} in get_changes("input/file_empty_added", joutput)
|
||||
|
||||
# The inode has two links and both of them are deleted. They should
|
||||
# appear as two deleted files.
|
||||
assert {"type": "removed", "size": 256} in get_changes("input/file_removed", joutput)
|
||||
assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/file_removed", joutput)
|
||||
if are_hardlinks_supported():
|
||||
assert {"type": "removed", "size": 256} in get_changes("input/hardlink_removed", joutput)
|
||||
assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/hardlink_removed", joutput)
|
||||
|
||||
if are_hardlinks_supported() and content_only:
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
|
@ -251,14 +250,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
time.sleep(1) # HFS has a 1s timestamp granularity
|
||||
self.create_regular_file("test_file", size=15)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "archive1", "archive2")
|
||||
output = self.cmd(
|
||||
f"--repo={self.repository_location}",
|
||||
"diff",
|
||||
"archive1",
|
||||
"archive2",
|
||||
"--format",
|
||||
"'{mtime}{ctime} {path}{NL}'",
|
||||
)
|
||||
self.assert_in("mtime", output)
|
||||
self.assert_in("ctime", output) # Should show up on windows as well since it is a new file.
|
||||
if is_darwin:
|
||||
time.sleep(1) # HFS has a 1s timestamp granularity
|
||||
os.chmod("input/test_file", 0o777)
|
||||
self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
|
||||
output = self.cmd(f"--repo={self.repository_location}", "diff", "archive2", "archive3")
|
||||
output = self.cmd(
|
||||
f"--repo={self.repository_location}",
|
||||
"diff",
|
||||
"archive2",
|
||||
"archive3",
|
||||
"--format",
|
||||
"'{mtime}{ctime} {path}{NL}'",
|
||||
)
|
||||
self.assert_not_in("mtime", output)
|
||||
# Checking platform because ctime should not be shown on windows since it wasn't recreated.
|
||||
if not is_win32:
|
||||
|
@ -294,7 +307,10 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
"e_file_changed",
|
||||
"f_file_removed",
|
||||
]
|
||||
assert all(x in line for x, line in zip(expected, output.splitlines()))
|
||||
assert isinstance(output, str)
|
||||
outputs = output.splitlines()
|
||||
assert len(outputs) == len(expected)
|
||||
assert all(x in line for x, line in zip(expected, outputs))
|
||||
|
||||
|
||||
class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
|
||||
|
|
Loading…
Reference in New Issue