Improve 'borg diff' output format

The main design goals of the new format:
 - One file takes exactly one line of output
 - The format is easy to read with typical, long list of changes
 - Metadata changes are visible and easy to examine
 - Unuseful information is not shown

Resolves #757.
This commit is contained in:
Lauri Niskanen 2016-03-21 14:48:08 +02:00
parent c405122753
commit 1d3e69e4c7
3 changed files with 196 additions and 83 deletions

View File

@ -433,86 +433,139 @@ class Archiver:
@with_archive
def do_diff(self, args, repository, manifest, key, archive):
"""Diff contents of two archives"""
def format_bytes(count):
if count is None:
return "<deleted>"
return format_file_size(count)
def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2):
chunks1 = archive1.pipeline.fetch_many(chunk_ids1)
chunks2 = archive2.pipeline.fetch_many(chunk_ids2)
return self.compare_chunk_contents(chunks1, chunks2)
def sum_chunk_size(item):
if item.get(b'deleted'):
return None
else:
return sum(c[1] for c in item[b'chunks'])
def get_owner(item):
if args.numeric_owner:
return item[b'uid'], item[b'gid']
else:
return item[b'user'], item[b'group']
def compare_items(path, item1, item2, deleted=False):
def get_mode(item):
if b'mode' in item:
return stat.filemode(item[b'mode'])
else:
return [None]
def has_hardlink_master(item, hardlink_masters):
return item.get(b'source') in hardlink_masters and get_mode(item)[0] != 'l'
def compare_link(item1, item2):
# These are the simple link cases. For special cases, e.g. if a
# regular file is replaced with a link or vice versa, it is
# indicated in compare_mode instead.
if item1.get(b'deleted'):
return 'added link'
elif item2.get(b'deleted'):
return 'removed link'
elif item1[b'source'] != item2[b'source']:
return 'changed link'
def contents_changed(item1, item2):
if can_compare_chunk_ids:
return item1[b'chunks'] != item2[b'chunks']
else:
if sum_chunk_size(item1) != sum_chunk_size(item2):
return True
else:
chunk_ids1 = [c[0] for c in item1[b'chunks']]
chunk_ids2 = [c[0] for c in item2[b'chunks']]
return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
def compare_content(path, item1, item2):
if contents_changed(item1, item2):
if item1.get(b'deleted'):
return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
elif item2.get(b'deleted'):
return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
else:
chunk_id_set1 = {c[0] for c in item1[b'chunks']}
chunk_id_set2 = {c[0] for c in item2[b'chunks']}
added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1))
removed = -sum(c[1] for c in (chunk_id_set1 - chunk_id_set2))
return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
format_file_size(removed, precision=1, sign=True)))
def compare_directory(item1, item2):
if item2.get(b'deleted') and not item1.get(b'deleted'):
return 'removed directory'
elif item1.get(b'deleted') and not item2.get(b'deleted'):
return 'added directory'
def compare_owner(item1, item2):
user1, group1 = get_owner(item1)
user2, group2 = get_owner(item2)
if user1 != user2 or group1 != group2:
return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
def compare_mode(item1, item2):
if item1[b'mode'] != item2[b'mode']:
return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
def compare_items(path, item1, item2, hardlink_masters, deleted=False):
"""
Compare two items with identical paths.
:param deleted: Whether one of the items has been deleted
"""
changes = []
if item1.get(b'hardlink_master') or item2.get(b'hardlink_master'):
hardlink_masters[path] = (item1, item2)
if has_hardlink_master(item1, hardlink_masters):
item1 = hardlink_masters[item1[b'source']][0]
if has_hardlink_master(item2, hardlink_masters):
item2 = hardlink_masters[item2[b'source']][1]
if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
changes.append(compare_link(item1, item2))
if b'chunks' in item1 and b'chunks' in item2:
changes.append(compare_content(path, item1, item2))
if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
changes.append(compare_directory(item1, item2))
if not deleted:
if item1[b'mode'] != item2[b'mode']:
print(remove_surrogates(path), 'different mode')
print('\t', args.location.archive, stat.filemode(item1[b'mode']))
print('\t', args.archive2, stat.filemode(item2[b'mode']))
changes.append(compare_owner(item1, item2))
changes.append(compare_mode(item1, item2))
user1, group1 = get_owner(item1)
user2, group2 = get_owner(item2)
if user1 != user2 or group1 != group2:
print(remove_surrogates(path), 'different owner')
print('\t', args.location.archive, 'user=%s, group=%s' % (user1, group1))
print('\t', args.archive2, 'user=%s, group=%s' % (user2, group2))
if not stat.S_ISREG(item1[b'mode']):
return
if b'chunks' not in item1 or b'chunks' not in item2:
# At least one of the items is a link
if item1.get(b'source') != item2.get(b'source'):
print(remove_surrogates(path), 'different link')
print('\t', args.location.archive, item1.get(b'source', '<regular file>'))
print('\t', args.archive2, item2.get(b'source', '<regular file>'))
return
if deleted or not can_compare_chunk_ids or item1[b'chunks'] != item2[b'chunks']:
# Contents are different
chunk_ids1 = [c[0] for c in item1[b'chunks']]
chunk_ids2 = [c[0] for c in item2[b'chunks']]
chunk_id_set1 = set(chunk_ids1)
chunk_id_set2 = set(chunk_ids2)
total1 = None if item1.get(b'deleted') else sum(c[1] for c in item1[b'chunks'])
total2 = None if item2.get(b'deleted') else sum(c[1] for c in item2[b'chunks'])
if (not can_compare_chunk_ids and total1 == total2 and not deleted and
fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)):
return
added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1))
removed = sum(c[1] for c in (chunk_id_set1 - chunk_id_set2))
print(remove_surrogates(path), 'different contents')
print('\t +%s, -%s, %s, %s' % (format_bytes(added), format_bytes(removed),
format_bytes(total1), format_bytes(total2)))
changes = [x for x in changes if x]
if changes:
print("{:<19} {}".format(' '.join(changes), remove_surrogates(path)))
def compare_archives(archive1, archive2, matcher):
orphans_archive1 = {}
orphans_archive2 = {}
hardlink_masters = {}
for item1, item2 in zip_longest(
archive1.iter_items(lambda item: matcher.match(item[b'path'])),
archive2.iter_items(lambda item: matcher.match(item[b'path'])),
):
if item1 and item2 and item1[b'path'] == item2[b'path']:
compare_items(item1[b'path'], item1, item2)
compare_items(item1[b'path'], item1, item2, hardlink_masters)
continue
if item1:
matching_orphan = orphans_archive2.pop(item1[b'path'], None)
if matching_orphan:
compare_items(item1[b'path'], item1, matching_orphan)
compare_items(item1[b'path'], item1, matching_orphan, hardlink_masters)
else:
orphans_archive1[item1[b'path']] = item1
if item2:
matching_orphan = orphans_archive1.pop(item2[b'path'], None)
if matching_orphan:
compare_items(item2[b'path'], matching_orphan, item2)
compare_items(item2[b'path'], matching_orphan, item2, hardlink_masters)
else:
orphans_archive2[item2[b'path']] = item2
# At this point orphans_* contain items that had no matching partner in the other archive
@ -520,12 +573,12 @@ class Archiver:
compare_items(added[b'path'], {
b'deleted': True,
b'chunks': [],
}, added, deleted=True)
}, added, hardlink_masters, deleted=True)
for deleted in orphans_archive1.values():
compare_items(deleted[b'path'], deleted, {
b'deleted': True,
b'chunks': [],
}, deleted=True)
}, hardlink_masters, deleted=True)
archive1 = archive
archive2 = Archive(repository, key, manifest, args.archive2)

View File

@ -1236,33 +1236,101 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
create_regular_file = ArchiverTestCase.create_regular_file
def test_basic_functionality(self):
# Initialize test folder
self.create_test_files()
self.cmd('init', self.repository_location)
os.chmod('input/dir2', stat.S_IFDIR | 0o755)
self.create_regular_file('file3', size=1024)
# Setup files for the first snapshot
self.create_regular_file('file_unchanged', size=128)
self.create_regular_file('file_removed', size=256)
self.create_regular_file('file_removed2', size=512)
self.create_regular_file('file_replaced', size=1024)
os.mkdir('input/dir_replaced_with_file')
os.chmod('input/dir_replaced_with_file', stat.S_IFDIR | 0o755)
os.mkdir('input/dir_removed')
os.symlink('input/dir_replaced_with_file', 'input/link_changed')
os.symlink('input/file_unchanged', 'input/link_removed')
os.symlink('input/file_removed2', 'input/link_target_removed')
os.symlink('input/empty', 'input/symlink')
os.link('input/empty', 'input/hardlink_contents_changed')
os.link('input/file_removed', 'input/hardlink_removed')
os.link('input/file_removed2', 'input/hardlink_target_removed')
os.link('input/file_replaced', 'input/hardlink_target_replaced')
# Create the first snapshot
self.cmd('create', self.repository_location + '::test0', 'input')
# replace 'hardlink' with a file
os.unlink('input/hardlink')
self.create_regular_file('hardlink', size=1024 * 80)
# replace directory with a file
os.unlink('input/dir2/file2')
os.rmdir('input/dir2')
self.create_regular_file('dir2', size=1024 * 80)
os.chmod('input/dir2', stat.S_IFREG | 0o755)
self.create_regular_file('file3', size=1024, contents=b'0')
# Setup files for the second snapshot
self.create_regular_file('file_added', size=2048)
os.unlink('input/file_removed')
os.unlink('input/file_removed2')
os.unlink('input/file_replaced')
self.create_regular_file('file_replaced', size=4096, contents=b'0')
os.rmdir('input/dir_replaced_with_file')
self.create_regular_file('dir_replaced_with_file', size=8192 * 80)
os.chmod('input/dir_replaced_with_file', stat.S_IFREG | 0o755)
os.mkdir('input/dir_added')
os.rmdir('input/dir_removed')
os.unlink('input/link_changed')
os.symlink('input/dir_added', 'input/link_changed')
os.symlink('input/dir_added', 'input/link_added')
os.unlink('input/link_removed')
os.unlink('input/hardlink_removed')
os.link('input/file_added', 'input/hardlink_added')
with open('input/empty', 'ab') as fd:
fd.write(b'appended_data')
# Create the second snapshot
self.cmd('create', self.repository_location + '::test1a', 'input')
self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input')
def do_asserts(output, archive):
assert 'input/file3 different contents' in output
assert 'input/hardlink different mode' in output
assert ('input/hardlink different link\n'
' test0 input/file1\n'
' test%s <regular file>' % archive) in output
assert ('input/dir2 different mode\n'
' test0 drwxr-xr-x\n'
' test%s -rwxr-xr-x\n' % archive) in output
assert 'input/dir2/file2 different contents' in output
# File contents changed (deleted and replaced with a new file)
assert 'B input/file_replaced' in output
# File unchanged
assert 'input/file_unchanged' not in output
# Directory replaced with a regular file
assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output
# Basic directory cases
assert 'added directory input/dir_added' in output
assert 'removed directory input/dir_removed' in output
# Basic symlink cases
assert 'changed link input/link_changed' in output
assert 'added link input/link_added' in output
assert 'removed link input/link_removed' in output
# Symlink target removed. Should not affect the symlink at all.
assert 'input/link_target_removed' not in output
# The inode has two links and the file contents changed. Borg
# should notice the changes in both links.
assert '0 B input/empty' in output
assert '0 B input/hardlink_contents_changed' in output
# Added a new file and a hard link to it. Both links to the same
# inode should appear as separate files.
assert 'added 2.05 kB input/file_added' in output
assert 'added 2.05 kB input/hardlink_added' in output
# The inode has two links and both of them are deleted. They should
# appear as two deleted files.
assert 'removed 256 B input/file_removed' in output
assert 'removed 256 B input/hardlink_removed' in output
# Another link (marked previously as the source in borg) to the
# same inode was removed. This should not change this link at all.
assert 'input/hardlink_target_removed' not in output
# Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should not
# change this link at all.
assert 'input/hardlink_target_replaced' not in output
do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), '1a')
# We expect exit_code=1 due to the chunker params warning
do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b')

View File

@ -399,26 +399,18 @@ Examples
$ cd ..
$ borg diff testrepo::archive1 archive2
file1 different mode
archive1 -rw-r--r--
archive2 -rwxr-xr-x
file2 different contents
+28 B, -31 B, 4.19 MB, 4.19 MB
[-rw-r--r-- -> -rwxr-xr-x] file1
+135 B -252 B file2
$ borg diff testrepo::archive2 archive3
file3 different contents
+0 B, -0 B, 0 B, <deleted>
added 0 B file4
removed 0 B file3
$ borg diff testrepo::archive1 archive3
file1 different mode
archive1 -rw-r--r--
archive3 -rwxr-xr-x
file2 different contents
+28 B, -31 B, 4.19 MB, 4.19 MB
file3 different contents
+0 B, -0 B, 0 B, <deleted>
file4 different contents
+0 B, -0 B, <deleted>, 0 B
[-rw-r--r-- -> -rwxr-xr-x] file1
+135 B -252 B file2
added 0 B file4
removed 0 B file3
.. include:: usage/delete.rst.inc