mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-22 22:22:27 +00:00
Improve 'borg diff' output format
The main design goals of the new format: - One file takes exactly one line of output - The format is easy to read with typical, long list of changes - Metadata changes are visible and easy to examine - Unuseful information is not shown Resolves #757.
This commit is contained in:
parent
c405122753
commit
1d3e69e4c7
3 changed files with 196 additions and 83 deletions
147
borg/archiver.py
147
borg/archiver.py
|
@ -433,86 +433,139 @@ def item_is_hardlink_master(item):
|
|||
@with_archive
|
||||
def do_diff(self, args, repository, manifest, key, archive):
|
||||
"""Diff contents of two archives"""
|
||||
def format_bytes(count):
|
||||
if count is None:
|
||||
return "<deleted>"
|
||||
return format_file_size(count)
|
||||
|
||||
def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2):
|
||||
chunks1 = archive1.pipeline.fetch_many(chunk_ids1)
|
||||
chunks2 = archive2.pipeline.fetch_many(chunk_ids2)
|
||||
return self.compare_chunk_contents(chunks1, chunks2)
|
||||
|
||||
def sum_chunk_size(item):
|
||||
if item.get(b'deleted'):
|
||||
return None
|
||||
else:
|
||||
return sum(c[1] for c in item[b'chunks'])
|
||||
|
||||
def get_owner(item):
|
||||
if args.numeric_owner:
|
||||
return item[b'uid'], item[b'gid']
|
||||
else:
|
||||
return item[b'user'], item[b'group']
|
||||
|
||||
def compare_items(path, item1, item2, deleted=False):
|
||||
def get_mode(item):
|
||||
if b'mode' in item:
|
||||
return stat.filemode(item[b'mode'])
|
||||
else:
|
||||
return [None]
|
||||
|
||||
def has_hardlink_master(item, hardlink_masters):
|
||||
return item.get(b'source') in hardlink_masters and get_mode(item)[0] != 'l'
|
||||
|
||||
def compare_link(item1, item2):
|
||||
# These are the simple link cases. For special cases, e.g. if a
|
||||
# regular file is replaced with a link or vice versa, it is
|
||||
# indicated in compare_mode instead.
|
||||
if item1.get(b'deleted'):
|
||||
return 'added link'
|
||||
elif item2.get(b'deleted'):
|
||||
return 'removed link'
|
||||
elif item1[b'source'] != item2[b'source']:
|
||||
return 'changed link'
|
||||
|
||||
def contents_changed(item1, item2):
|
||||
if can_compare_chunk_ids:
|
||||
return item1[b'chunks'] != item2[b'chunks']
|
||||
else:
|
||||
if sum_chunk_size(item1) != sum_chunk_size(item2):
|
||||
return True
|
||||
else:
|
||||
chunk_ids1 = [c[0] for c in item1[b'chunks']]
|
||||
chunk_ids2 = [c[0] for c in item2[b'chunks']]
|
||||
return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
|
||||
|
||||
def compare_content(path, item1, item2):
|
||||
if contents_changed(item1, item2):
|
||||
if item1.get(b'deleted'):
|
||||
return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
|
||||
elif item2.get(b'deleted'):
|
||||
return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
|
||||
else:
|
||||
chunk_id_set1 = {c[0] for c in item1[b'chunks']}
|
||||
chunk_id_set2 = {c[0] for c in item2[b'chunks']}
|
||||
added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1))
|
||||
removed = -sum(c[1] for c in (chunk_id_set1 - chunk_id_set2))
|
||||
|
||||
return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
|
||||
format_file_size(removed, precision=1, sign=True)))
|
||||
|
||||
def compare_directory(item1, item2):
|
||||
if item2.get(b'deleted') and not item1.get(b'deleted'):
|
||||
return 'removed directory'
|
||||
elif item1.get(b'deleted') and not item2.get(b'deleted'):
|
||||
return 'added directory'
|
||||
|
||||
def compare_owner(item1, item2):
|
||||
user1, group1 = get_owner(item1)
|
||||
user2, group2 = get_owner(item2)
|
||||
if user1 != user2 or group1 != group2:
|
||||
return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
|
||||
|
||||
def compare_mode(item1, item2):
|
||||
if item1[b'mode'] != item2[b'mode']:
|
||||
return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
|
||||
|
||||
def compare_items(path, item1, item2, hardlink_masters, deleted=False):
|
||||
"""
|
||||
Compare two items with identical paths.
|
||||
:param deleted: Whether one of the items has been deleted
|
||||
"""
|
||||
changes = []
|
||||
|
||||
if item1.get(b'hardlink_master') or item2.get(b'hardlink_master'):
|
||||
hardlink_masters[path] = (item1, item2)
|
||||
|
||||
if has_hardlink_master(item1, hardlink_masters):
|
||||
item1 = hardlink_masters[item1[b'source']][0]
|
||||
|
||||
if has_hardlink_master(item2, hardlink_masters):
|
||||
item2 = hardlink_masters[item2[b'source']][1]
|
||||
|
||||
if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
|
||||
changes.append(compare_link(item1, item2))
|
||||
|
||||
if b'chunks' in item1 and b'chunks' in item2:
|
||||
changes.append(compare_content(path, item1, item2))
|
||||
|
||||
if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
|
||||
changes.append(compare_directory(item1, item2))
|
||||
|
||||
if not deleted:
|
||||
if item1[b'mode'] != item2[b'mode']:
|
||||
print(remove_surrogates(path), 'different mode')
|
||||
print('\t', args.location.archive, stat.filemode(item1[b'mode']))
|
||||
print('\t', args.archive2, stat.filemode(item2[b'mode']))
|
||||
changes.append(compare_owner(item1, item2))
|
||||
changes.append(compare_mode(item1, item2))
|
||||
|
||||
user1, group1 = get_owner(item1)
|
||||
user2, group2 = get_owner(item2)
|
||||
if user1 != user2 or group1 != group2:
|
||||
print(remove_surrogates(path), 'different owner')
|
||||
print('\t', args.location.archive, 'user=%s, group=%s' % (user1, group1))
|
||||
print('\t', args.archive2, 'user=%s, group=%s' % (user2, group2))
|
||||
|
||||
if not stat.S_ISREG(item1[b'mode']):
|
||||
return
|
||||
if b'chunks' not in item1 or b'chunks' not in item2:
|
||||
# At least one of the items is a link
|
||||
if item1.get(b'source') != item2.get(b'source'):
|
||||
print(remove_surrogates(path), 'different link')
|
||||
print('\t', args.location.archive, item1.get(b'source', '<regular file>'))
|
||||
print('\t', args.archive2, item2.get(b'source', '<regular file>'))
|
||||
return
|
||||
if deleted or not can_compare_chunk_ids or item1[b'chunks'] != item2[b'chunks']:
|
||||
# Contents are different
|
||||
chunk_ids1 = [c[0] for c in item1[b'chunks']]
|
||||
chunk_ids2 = [c[0] for c in item2[b'chunks']]
|
||||
chunk_id_set1 = set(chunk_ids1)
|
||||
chunk_id_set2 = set(chunk_ids2)
|
||||
total1 = None if item1.get(b'deleted') else sum(c[1] for c in item1[b'chunks'])
|
||||
total2 = None if item2.get(b'deleted') else sum(c[1] for c in item2[b'chunks'])
|
||||
if (not can_compare_chunk_ids and total1 == total2 and not deleted and
|
||||
fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)):
|
||||
return
|
||||
added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1))
|
||||
removed = sum(c[1] for c in (chunk_id_set1 - chunk_id_set2))
|
||||
print(remove_surrogates(path), 'different contents')
|
||||
print('\t +%s, -%s, %s, %s' % (format_bytes(added), format_bytes(removed),
|
||||
format_bytes(total1), format_bytes(total2)))
|
||||
changes = [x for x in changes if x]
|
||||
if changes:
|
||||
print("{:<19} {}".format(' '.join(changes), remove_surrogates(path)))
|
||||
|
||||
def compare_archives(archive1, archive2, matcher):
|
||||
orphans_archive1 = {}
|
||||
orphans_archive2 = {}
|
||||
hardlink_masters = {}
|
||||
for item1, item2 in zip_longest(
|
||||
archive1.iter_items(lambda item: matcher.match(item[b'path'])),
|
||||
archive2.iter_items(lambda item: matcher.match(item[b'path'])),
|
||||
):
|
||||
if item1 and item2 and item1[b'path'] == item2[b'path']:
|
||||
compare_items(item1[b'path'], item1, item2)
|
||||
compare_items(item1[b'path'], item1, item2, hardlink_masters)
|
||||
continue
|
||||
if item1:
|
||||
matching_orphan = orphans_archive2.pop(item1[b'path'], None)
|
||||
if matching_orphan:
|
||||
compare_items(item1[b'path'], item1, matching_orphan)
|
||||
compare_items(item1[b'path'], item1, matching_orphan, hardlink_masters)
|
||||
else:
|
||||
orphans_archive1[item1[b'path']] = item1
|
||||
if item2:
|
||||
matching_orphan = orphans_archive1.pop(item2[b'path'], None)
|
||||
if matching_orphan:
|
||||
compare_items(item2[b'path'], matching_orphan, item2)
|
||||
compare_items(item2[b'path'], matching_orphan, item2, hardlink_masters)
|
||||
else:
|
||||
orphans_archive2[item2[b'path']] = item2
|
||||
# At this point orphans_* contain items that had no matching partner in the other archive
|
||||
|
@ -520,12 +573,12 @@ def compare_archives(archive1, archive2, matcher):
|
|||
compare_items(added[b'path'], {
|
||||
b'deleted': True,
|
||||
b'chunks': [],
|
||||
}, added, deleted=True)
|
||||
}, added, hardlink_masters, deleted=True)
|
||||
for deleted in orphans_archive1.values():
|
||||
compare_items(deleted[b'path'], deleted, {
|
||||
b'deleted': True,
|
||||
b'chunks': [],
|
||||
}, deleted=True)
|
||||
}, hardlink_masters, deleted=True)
|
||||
|
||||
archive1 = archive
|
||||
archive2 = Archive(repository, key, manifest, args.archive2)
|
||||
|
|
|
@ -1236,33 +1236,101 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
|
|||
create_regular_file = ArchiverTestCase.create_regular_file
|
||||
|
||||
def test_basic_functionality(self):
|
||||
# Initialize test folder
|
||||
self.create_test_files()
|
||||
self.cmd('init', self.repository_location)
|
||||
os.chmod('input/dir2', stat.S_IFDIR | 0o755)
|
||||
self.create_regular_file('file3', size=1024)
|
||||
|
||||
# Setup files for the first snapshot
|
||||
self.create_regular_file('file_unchanged', size=128)
|
||||
self.create_regular_file('file_removed', size=256)
|
||||
self.create_regular_file('file_removed2', size=512)
|
||||
self.create_regular_file('file_replaced', size=1024)
|
||||
os.mkdir('input/dir_replaced_with_file')
|
||||
os.chmod('input/dir_replaced_with_file', stat.S_IFDIR | 0o755)
|
||||
os.mkdir('input/dir_removed')
|
||||
os.symlink('input/dir_replaced_with_file', 'input/link_changed')
|
||||
os.symlink('input/file_unchanged', 'input/link_removed')
|
||||
os.symlink('input/file_removed2', 'input/link_target_removed')
|
||||
os.symlink('input/empty', 'input/symlink')
|
||||
os.link('input/empty', 'input/hardlink_contents_changed')
|
||||
os.link('input/file_removed', 'input/hardlink_removed')
|
||||
os.link('input/file_removed2', 'input/hardlink_target_removed')
|
||||
os.link('input/file_replaced', 'input/hardlink_target_replaced')
|
||||
|
||||
# Create the first snapshot
|
||||
self.cmd('create', self.repository_location + '::test0', 'input')
|
||||
# replace 'hardlink' with a file
|
||||
os.unlink('input/hardlink')
|
||||
self.create_regular_file('hardlink', size=1024 * 80)
|
||||
# replace directory with a file
|
||||
os.unlink('input/dir2/file2')
|
||||
os.rmdir('input/dir2')
|
||||
self.create_regular_file('dir2', size=1024 * 80)
|
||||
os.chmod('input/dir2', stat.S_IFREG | 0o755)
|
||||
self.create_regular_file('file3', size=1024, contents=b'0')
|
||||
|
||||
# Setup files for the second snapshot
|
||||
self.create_regular_file('file_added', size=2048)
|
||||
os.unlink('input/file_removed')
|
||||
os.unlink('input/file_removed2')
|
||||
os.unlink('input/file_replaced')
|
||||
self.create_regular_file('file_replaced', size=4096, contents=b'0')
|
||||
os.rmdir('input/dir_replaced_with_file')
|
||||
self.create_regular_file('dir_replaced_with_file', size=8192 * 80)
|
||||
os.chmod('input/dir_replaced_with_file', stat.S_IFREG | 0o755)
|
||||
os.mkdir('input/dir_added')
|
||||
os.rmdir('input/dir_removed')
|
||||
os.unlink('input/link_changed')
|
||||
os.symlink('input/dir_added', 'input/link_changed')
|
||||
os.symlink('input/dir_added', 'input/link_added')
|
||||
os.unlink('input/link_removed')
|
||||
os.unlink('input/hardlink_removed')
|
||||
os.link('input/file_added', 'input/hardlink_added')
|
||||
|
||||
with open('input/empty', 'ab') as fd:
|
||||
fd.write(b'appended_data')
|
||||
|
||||
# Create the second snapshot
|
||||
self.cmd('create', self.repository_location + '::test1a', 'input')
|
||||
self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input')
|
||||
|
||||
def do_asserts(output, archive):
|
||||
assert 'input/file3 different contents' in output
|
||||
assert 'input/hardlink different mode' in output
|
||||
assert ('input/hardlink different link\n'
|
||||
' test0 input/file1\n'
|
||||
' test%s <regular file>' % archive) in output
|
||||
assert ('input/dir2 different mode\n'
|
||||
' test0 drwxr-xr-x\n'
|
||||
' test%s -rwxr-xr-x\n' % archive) in output
|
||||
assert 'input/dir2/file2 different contents' in output
|
||||
# File contents changed (deleted and replaced with a new file)
|
||||
assert 'B input/file_replaced' in output
|
||||
|
||||
# File unchanged
|
||||
assert 'input/file_unchanged' not in output
|
||||
|
||||
# Directory replaced with a regular file
|
||||
assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output
|
||||
|
||||
# Basic directory cases
|
||||
assert 'added directory input/dir_added' in output
|
||||
assert 'removed directory input/dir_removed' in output
|
||||
|
||||
# Basic symlink cases
|
||||
assert 'changed link input/link_changed' in output
|
||||
assert 'added link input/link_added' in output
|
||||
assert 'removed link input/link_removed' in output
|
||||
|
||||
# Symlink target removed. Should not affect the symlink at all.
|
||||
assert 'input/link_target_removed' not in output
|
||||
|
||||
# The inode has two links and the file contents changed. Borg
|
||||
# should notice the changes in both links.
|
||||
assert '0 B input/empty' in output
|
||||
assert '0 B input/hardlink_contents_changed' in output
|
||||
|
||||
# Added a new file and a hard link to it. Both links to the same
|
||||
# inode should appear as separate files.
|
||||
assert 'added 2.05 kB input/file_added' in output
|
||||
assert 'added 2.05 kB input/hardlink_added' in output
|
||||
|
||||
# The inode has two links and both of them are deleted. They should
|
||||
# appear as two deleted files.
|
||||
assert 'removed 256 B input/file_removed' in output
|
||||
assert 'removed 256 B input/hardlink_removed' in output
|
||||
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was removed. This should not change this link at all.
|
||||
assert 'input/hardlink_target_removed' not in output
|
||||
|
||||
# Another link (marked previously as the source in borg) to the
|
||||
# same inode was replaced with a new regular file. This should not
|
||||
# change this link at all.
|
||||
assert 'input/hardlink_target_replaced' not in output
|
||||
|
||||
do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), '1a')
|
||||
# We expect exit_code=1 due to the chunker params warning
|
||||
do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b')
|
||||
|
|
|
@ -399,26 +399,18 @@ Examples
|
|||
|
||||
$ cd ..
|
||||
$ borg diff testrepo::archive1 archive2
|
||||
file1 different mode
|
||||
archive1 -rw-r--r--
|
||||
archive2 -rwxr-xr-x
|
||||
file2 different contents
|
||||
+28 B, -31 B, 4.19 MB, 4.19 MB
|
||||
[-rw-r--r-- -> -rwxr-xr-x] file1
|
||||
+135 B -252 B file2
|
||||
|
||||
$ borg diff testrepo::archive2 archive3
|
||||
file3 different contents
|
||||
+0 B, -0 B, 0 B, <deleted>
|
||||
added 0 B file4
|
||||
removed 0 B file3
|
||||
|
||||
$ borg diff testrepo::archive1 archive3
|
||||
file1 different mode
|
||||
archive1 -rw-r--r--
|
||||
archive3 -rwxr-xr-x
|
||||
file2 different contents
|
||||
+28 B, -31 B, 4.19 MB, 4.19 MB
|
||||
file3 different contents
|
||||
+0 B, -0 B, 0 B, <deleted>
|
||||
file4 different contents
|
||||
+0 B, -0 B, <deleted>, 0 B
|
||||
[-rw-r--r-- -> -rwxr-xr-x] file1
|
||||
+135 B -252 B file2
|
||||
added 0 B file4
|
||||
removed 0 B file3
|
||||
|
||||
.. include:: usage/delete.rst.inc
|
||||
|
||||
|
|
Loading…
Reference in a new issue