1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-10 06:03:38 +00:00

Merge pull request #7414 from Michael-Girma/backport/1.2-maint-7248

Show ctime and mtime on borg diff, fixes #7248 (Backport of #7335)
This commit is contained in:
TW 2023-03-20 14:14:30 +01:00 committed by GitHub
commit 2ab678ca93
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 116 additions and 51 deletions

View file

@ -1044,7 +1044,7 @@ Utilization of max. archive size: {csize_max:.0%}
logger.warning('borg check --repair is required to free all space.') logger.warning('borg check --repair is required to free all space.')
@staticmethod @staticmethod
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False): def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
""" """
Yields tuples with a path and an ItemDiff instance describing changes/indicating equality. Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
@ -1073,7 +1073,8 @@ Utilization of max. archive size: {csize_max:.0%}
return ItemDiff(item1, item2, return ItemDiff(item1, item2,
archive1.pipeline.fetch_many([c.id for c in item1.get('chunks', [])]), archive1.pipeline.fetch_many([c.id for c in item1.get('chunks', [])]),
archive2.pipeline.fetch_many([c.id for c in item2.get('chunks', [])]), archive2.pipeline.fetch_many([c.id for c in item2.get('chunks', [])]),
can_compare_chunk_ids=can_compare_chunk_ids) can_compare_chunk_ids=can_compare_chunk_ids,
content_only=content_only)
def defer_if_necessary(item1, item2): def defer_if_necessary(item1, item2):
"""Adds item tuple to deferred if necessary and returns True, if items were deferred""" """Adds item tuple to deferred if necessary and returns True, if items were deferred"""
@ -1124,7 +1125,8 @@ Utilization of max. archive size: {csize_max:.0%}
for item1, item2 in deferred: for item1, item2 in deferred:
assert hardlink_master_seen(item1) assert hardlink_master_seen(item1)
assert hardlink_master_seen(item2) assert hardlink_master_seen(item2)
yield (path, compare_items(item1, item2)) assert item1.path == item2.path, "Deferred items have different paths"
yield (item1.path, compare_items(item1, item2))
class MetadataCollector: class MetadataCollector:

View file

@ -75,6 +75,7 @@ try:
from .helpers import sig_int, ignore_sigint from .helpers import sig_int, ignore_sigint
from .helpers import iter_separated from .helpers import iter_separated
from .helpers import get_tar_filter from .helpers import get_tar_filter
from .helpers.parseformat import BorgJsonEncoder
from .nanorst import rst_to_terminal from .nanorst import rst_to_terminal
from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .patterns import PatternMatcher from .patterns import PatternMatcher
@ -1116,7 +1117,7 @@ class Archiver:
"""Diff contents of two archives""" """Diff contents of two archives"""
def print_json_output(diff, path): def print_json_output(diff, path):
print(json.dumps({"path": path, "changes": [j for j, str in diff]})) print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))
def print_text_output(diff, path): def print_text_output(diff, path):
print("{:<19} {}".format(' '.join([str for j, str in diff]), path)) print("{:<19} {}".format(' '.join([str for j, str in diff]), path))
@ -1145,7 +1146,7 @@ class Archiver:
matcher = self.build_matcher(args.patterns, args.paths) matcher = self.build_matcher(args.patterns, args.paths)
diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids) diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only)
# Conversion to string and filtering for diff.equal to save memory if sorting # Conversion to string and filtering for diff.equal to save memory if sorting
diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal) diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
@ -3939,6 +3940,11 @@ class Archiver:
help='Override check of chunker parameters.') help='Override check of chunker parameters.')
subparser.add_argument('--sort', dest='sort', action='store_true', subparser.add_argument('--sort', dest='sort', action='store_true',
help='Sort the output lines by file path.') help='Sort the output lines by file path.')
subparser.add_argument(
"--content-only",
action="store_true",
help="Only compare differences in content (exclude metadata differences)",
)
subparser.add_argument('--json-lines', action='store_true', subparser.add_argument('--json-lines', action='store_true',
help='Format output as JSON Lines. ') help='Format output as JSON Lines. ')
subparser.add_argument('location', metavar='REPO::ARCHIVE1', subparser.add_argument('location', metavar='REPO::ARCHIVE1',

View file

@ -648,7 +648,7 @@ class ArchiveFormatter(BaseFormatter):
self.item_data = static_keys self.item_data = static_keys
def format_item_json(self, item): def format_item_json(self, item):
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n' return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + '\n'
def get_item_data(self, archive_info): def get_item_data(self, archive_info):
self.name = archive_info.name self.name = archive_info.name

View file

@ -6,6 +6,7 @@ from .helpers import safe_encode, safe_decode
from .helpers import bigint_to_int, int_to_bigint from .helpers import bigint_to_int, int_to_bigint
from .helpers import StableDict from .helpers import StableDict
from .helpers import format_file_size from .helpers import format_file_size
from .helpers.time import OutputTimestamp, safe_timestamp
cdef extern from "_item.c": cdef extern from "_item.c":
object _object_to_optr(object obj) object _object_to_optr(object obj)
@ -421,9 +422,10 @@ class ItemDiff:
It does not include extended or time attributes in the comparison. It does not include extended or time attributes in the comparison.
""" """
def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False): def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
self._item1 = item1 self._item1 = item1
self._item2 = item2 self._item2 = item2
self._content_only = content_only
self._numeric_ids = numeric_ids self._numeric_ids = numeric_ids
self._can_compare_chunk_ids = can_compare_chunk_ids self._can_compare_chunk_ids = can_compare_chunk_ids
self.equal = self._equal(chunk_iterator1, chunk_iterator2) self.equal = self._equal(chunk_iterator1, chunk_iterator2)
@ -447,9 +449,10 @@ class ItemDiff:
if self._item1.is_fifo() or self._item2.is_fifo(): if self._item1.is_fifo() or self._item2.is_fifo():
changes.append(self._presence_diff('fifo')) changes.append(self._presence_diff('fifo'))
if not (self._item1.get('deleted') or self._item2.get('deleted')): if not (self._item1.get('deleted') or self._item2.get('deleted')) and not self._content_only:
changes.append(self._owner_diff()) changes.append(self._owner_diff())
changes.append(self._mode_diff()) changes.append(self._mode_diff())
changes.extend(self._time_diffs())
# filter out empty changes # filter out empty changes
self._changes = [ch for ch in changes if ch] self._changes = [ch for ch in changes if ch]
@ -467,8 +470,11 @@ class ItemDiff:
if self._item1.get('deleted') and self._item2.get('deleted'): if self._item1.get('deleted') and self._item2.get('deleted'):
return True return True
attr_list = ['deleted', 'mode', 'source'] attr_list = ['deleted', 'source']
if not self._content_only:
attr_list += ['mode', 'ctime', 'mtime']
attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group'] attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
for attr in attr_list: for attr in attr_list:
if self._item1.get(attr) != self._item2.get(attr): if self._item1.get(attr) != self._item2.get(attr):
return False return False
@ -531,6 +537,16 @@ class ItemDiff:
mode2 = stat.filemode(self._item2.mode) mode2 = stat.filemode(self._item2.mode)
return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2)) return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
def _time_diffs(self):
changes = []
attrs = ["ctime", "mtime"]
for attr in attrs:
if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
return changes
def _content_equal(self, chunk_iterator1, chunk_iterator2): def _content_equal(self, chunk_iterator1, chunk_iterator2):
if self._can_compare_chunk_ids: if self._can_compare_chunk_ids:
return self._item1.chunks == self._item2.chunks return self._item1.chunks == self._item2.chunks

View file

@ -7,6 +7,7 @@ try:
except ImportError: except ImportError:
posix = None posix = None
import re
import stat import stat
import sys import sys
import sysconfig import sysconfig
@ -187,6 +188,9 @@ class BaseTestCase(unittest.TestCase):
diff = filecmp.dircmp(dir1, dir2) diff = filecmp.dircmp(dir1, dir2)
self._assert_dirs_equal_cmp(diff, **kwargs) self._assert_dirs_equal_cmp(diff, **kwargs)
def assert_line_exists(self, lines, expected_regexpr):
assert any(re.search(expected_regexpr, line) for line in lines), f"no match for {expected_regexpr} in {lines}"
def _assert_dirs_equal_cmp(self, diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False): def _assert_dirs_equal_cmp(self, diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False):
self.assert_equal(diff.left_only, []) self.assert_equal(diff.left_only, [])
self.assert_equal(diff.right_only, []) self.assert_equal(diff.right_only, [])

View file

@ -56,7 +56,7 @@ from ..repository import Repository
from . import has_lchflags, llfuse from . import has_lchflags, llfuse
from . import BaseTestCase, changedir, environment_variable, no_selinux, same_ts_ns from . import BaseTestCase, changedir, environment_variable, no_selinux, same_ts_ns
from . import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported, is_utime_fully_supported, is_birthtime_fully_supported from . import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported, is_utime_fully_supported, is_birthtime_fully_supported
from .platform import fakeroot_detected, is_darwin from .platform import fakeroot_detected, is_darwin, is_win32
from .upgrader import make_attic_repo from .upgrader import make_attic_repo
from . import key from . import key
@ -4263,18 +4263,19 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::test1a', 'input') self.cmd('create', self.repository_location + '::test1a', 'input')
self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input') self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input')
def do_asserts(output, can_compare_ids): def do_asserts(output, can_compare_ids, content_only=False):
# File contents changed (deleted and replaced with a new file) # File contents changed (deleted and replaced with a new file)
change = 'B' if can_compare_ids else '{:<19}'.format('modified') change = 'B' if can_compare_ids else '{:<19}'.format('modified')
lines = output.splitlines()
assert 'file_replaced' in output # added to debug #3494 assert 'file_replaced' in output # added to debug #3494
assert f'{change} input/file_replaced' in output self.assert_line_exists(lines, f"{change}.*input/file_replaced")
# File unchanged # File unchanged
assert 'input/file_unchanged' not in output assert 'input/file_unchanged' not in output
# Directory replaced with a regular file # Directory replaced with a regular file
if 'BORG_TESTS_IGNORE_MODES' not in os.environ: if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")
# Basic directory cases # Basic directory cases
assert 'added directory input/dir_added' in output assert 'added directory input/dir_added' in output
@ -4282,13 +4283,13 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
if are_symlinks_supported(): if are_symlinks_supported():
# Basic symlink cases # Basic symlink cases
assert 'changed link input/link_changed' in output self.assert_line_exists(lines, "changed link.*input/link_changed")
assert 'added link input/link_added' in output self.assert_line_exists(lines, "added link.*input/link_added")
assert 'removed link input/link_removed' in output self.assert_line_exists(lines, "removed link.*input/link_removed")
# Symlink replacing or being replaced # Symlink replacing or being replaced
assert '] input/dir_replaced_with_link' in output assert 'input/dir_replaced_with_link' in output
assert '] input/link_replaced_by_file' in output assert 'input/link_replaced_by_file' in output
# Symlink target removed. Should not affect the symlink at all. # Symlink target removed. Should not affect the symlink at all.
assert 'input/link_target_removed' not in output assert 'input/link_target_removed' not in output
@ -4297,9 +4298,10 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
# should notice the changes in both links. However, the symlink # should notice the changes in both links. However, the symlink
# pointing to the file is not changed. # pointing to the file is not changed.
change = '0 B' if can_compare_ids else '{:<19}'.format('modified') change = '0 B' if can_compare_ids else '{:<19}'.format('modified')
assert f'{change} input/empty' in output self.assert_line_exists(lines, f"{change}.*input/empty")
if are_hardlinks_supported(): if are_hardlinks_supported():
assert f'{change} input/hardlink_contents_changed' in output self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
if are_symlinks_supported(): if are_symlinks_supported():
assert 'input/link_target_contents_changed' not in output assert 'input/link_target_contents_changed' not in output
@ -4318,18 +4320,18 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
if are_hardlinks_supported(): if are_hardlinks_supported():
assert 'removed 256 B input/hardlink_removed' in output assert 'removed 256 B input/hardlink_removed' in output
if are_hardlinks_supported() and content_only:
# Another link (marked previously as the source in borg) to the # Another link (marked previously as the source in borg) to the
# same inode was removed. This should not change this link at all. # same inode was removed. This should only change the ctime since removing
if are_hardlinks_supported(): # the link would result in the decrementation of the inode's hard-link count.
assert 'input/hardlink_target_removed' not in output assert "input/hardlink_target_removed" not in output
# Another link (marked previously as the source in borg) to the # Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should not # same inode was replaced with a new regular file. This should only change
# change this link at all. # its ctime. This should not be reflected in the output if content-only is set
if are_hardlinks_supported(): assert "input/hardlink_target_replaced" not in output
assert 'input/hardlink_target_replaced' not in output
def do_json_asserts(output, can_compare_ids): def do_json_asserts(output, can_compare_ids, content_only=False):
def get_changes(filename, data): def get_changes(filename, data):
chgsets = [j['changes'] for j in data if j['path'] == filename] chgsets = [j['changes'] for j in data if j['path'] == filename]
assert len(chgsets) < 2 assert len(chgsets) < 2
@ -4347,7 +4349,7 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
assert not any(get_changes('input/file_unchanged', joutput)) assert not any(get_changes('input/file_unchanged', joutput))
# Directory replaced with a regular file # Directory replaced with a regular file
if 'BORG_TESTS_IGNORE_MODES' not in os.environ: if 'BORG_TESTS_IGNORE_MODES' not in os.environ and not content_only:
assert {'type': 'mode', 'old_mode': 'drwxr-xr-x', 'new_mode': '-rwxr-xr-x'} in \ assert {'type': 'mode', 'old_mode': 'drwxr-xr-x', 'new_mode': '-rwxr-xr-x'} in \
get_changes('input/dir_replaced_with_file', joutput) get_changes('input/dir_replaced_with_file', joutput)
@ -4362,10 +4364,15 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
assert {'type': 'removed link'} in get_changes('input/link_removed', joutput) assert {'type': 'removed link'} in get_changes('input/link_removed', joutput)
# Symlink replacing or being replaced # Symlink replacing or being replaced
assert any(chg['type'] == 'mode' and chg['new_mode'].startswith('l') for chg in if not content_only:
get_changes('input/dir_replaced_with_link', joutput)) assert any(
assert any(chg['type'] == 'mode' and chg['old_mode'].startswith('l') for chg in chg["type"] == "mode" and chg["new_mode"].startswith("l")
get_changes('input/link_replaced_by_file', joutput)) for chg in get_changes("input/dir_replaced_with_link", joutput)
), get_changes("input/dir_replaced_with_link", joutput)
assert any(
chg["type"] == "mode" and chg["old_mode"].startswith("l")
for chg in get_changes("input/link_replaced_by_file", joutput)
), get_changes("input/link_replaced_by_file", joutput)
# Symlink target removed. Should not affect the symlink at all. # Symlink target removed. Should not affect the symlink at all.
assert not any(get_changes('input/link_target_removed', joutput)) assert not any(get_changes('input/link_target_removed', joutput))
@ -4395,21 +4402,27 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
if are_hardlinks_supported(): if are_hardlinks_supported():
assert {'type': 'removed', 'size': 256} in get_changes('input/hardlink_removed', joutput) assert {'type': 'removed', 'size': 256} in get_changes('input/hardlink_removed', joutput)
if are_hardlinks_supported() and content_only:
# Another link (marked previously as the source in borg) to the # Another link (marked previously as the source in borg) to the
# same inode was removed. This should not change this link at all. # same inode was removed. This should only change the ctime since removing
if are_hardlinks_supported(): # the link would result in the decrementation of the inode's hard-link count.
assert not any(get_changes('input/hardlink_target_removed', joutput)) assert not any(get_changes("input/hardlink_target_removed", joutput))
# Another link (marked previously as the source in borg) to the # Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should not # same inode was replaced with a new regular file. This should only change
# change this link at all. # its ctime. This should not be reflected in the output if content-only is set
if are_hardlinks_supported(): assert not any(get_changes("input/hardlink_target_replaced", joutput))
assert not any(get_changes('input/hardlink_target_replaced', joutput))
do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), True) output = self.cmd("diff", self.repository_location + "::test0", "test1a")
# We expect exit_code=1 due to the chunker params warning do_asserts(output, True)
do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), False) output = self.cmd("diff", self.repository_location + "::test0", "test1b", "--content-only", exit_code=1)
do_json_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a', '--json-lines'), True) do_asserts(output, False, content_only=True)
output = self.cmd("diff", self.repository_location + "::test0", "test1a", "--json-lines")
do_json_asserts(output, True)
output = self.cmd("diff", self.repository_location + "::test0", "test1a", "--json-lines", "--content-only")
do_json_asserts(output, True, content_only=True)
def test_sort_option(self): def test_sort_option(self):
self.cmd('init', '--encryption=repokey', self.repository_location) self.cmd('init', '--encryption=repokey', self.repository_location)
@ -4430,7 +4443,7 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
self.create_regular_file('d_file_added', size=256) self.create_regular_file('d_file_added', size=256)
self.cmd('create', self.repository_location + '::test1', 'input') self.cmd('create', self.repository_location + '::test1', 'input')
output = self.cmd('diff', '--sort', self.repository_location + '::test0', 'test1') output = self.cmd('diff', '--sort', self.repository_location + '::test0', 'test1', '--content-only')
expected = [ expected = [
'a_file_removed', 'a_file_removed',
'b_file_added', 'b_file_added',
@ -4442,6 +4455,30 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
assert all(x in line for x, line in zip(expected, output.splitlines())) assert all(x in line for x, line in zip(expected, output.splitlines()))
def test_time_diffs(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file("test_file", size=10)
self.cmd('create', self.repository_location + '::archive1', 'input')
time.sleep(0.1)
os.unlink("input/test_file")
if is_win32:
# Sleeping for 15s because Windows doesn't refresh ctime if file is deleted and recreated within 15 seconds.
time.sleep(15)
self.create_regular_file("test_file", size=15)
self.cmd('create', self.repository_location + '::archive2', 'input')
output = self.cmd("diff", self.repository_location + "::archive1", "archive2")
self.assert_in("mtime", output)
self.assert_in("ctime", output) # Should show up on windows as well since it is a new file.
os.chmod("input/test_file", 777)
self.cmd('create', self.repository_location + '::archive3', 'input')
output = self.cmd("diff", self.repository_location + "::archive2", "archive3")
self.assert_not_in("mtime", output)
# Checking platform because ctime should not be shown on windows since it wasn't recreated.
if not is_win32:
self.assert_in("ctime", output)
else:
self.assert_not_in("ctime", output)
def test_get_args(): def test_get_args():
archiver = Archiver() archiver = Archiver()