From 1d3e69e4c7bfbbe943a58d32330ff759afae97ec Mon Sep 17 00:00:00 2001 From: Lauri Niskanen Date: Mon, 21 Mar 2016 14:48:08 +0200 Subject: [PATCH] Improve 'borg diff' output format The main design goals of the new format: - One file takes exactly one line of output - The format is easy to read with typical, long list of changes - Metadata changes are visible and easy to examine - Unuseful information is not shown Resolves #757. --- borg/archiver.py | 147 +++++++++++++++++++++++++------------ borg/testsuite/archiver.py | 108 ++++++++++++++++++++++----- docs/usage.rst | 24 ++---- 3 files changed, 196 insertions(+), 83 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5868c3082..042a70f7c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -433,86 +433,139 @@ class Archiver: @with_archive def do_diff(self, args, repository, manifest, key, archive): """Diff contents of two archives""" - def format_bytes(count): - if count is None: - return "" - return format_file_size(count) - def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2): chunks1 = archive1.pipeline.fetch_many(chunk_ids1) chunks2 = archive2.pipeline.fetch_many(chunk_ids2) return self.compare_chunk_contents(chunks1, chunks2) + def sum_chunk_size(item): + if item.get(b'deleted'): + return None + else: + return sum(c[1] for c in item[b'chunks']) + def get_owner(item): if args.numeric_owner: return item[b'uid'], item[b'gid'] else: return item[b'user'], item[b'group'] - def compare_items(path, item1, item2, deleted=False): + def get_mode(item): + if b'mode' in item: + return stat.filemode(item[b'mode']) + else: + return [None] + + def has_hardlink_master(item, hardlink_masters): + return item.get(b'source') in hardlink_masters and get_mode(item)[0] != 'l' + + def compare_link(item1, item2): + # These are the simple link cases. For special cases, e.g. if a + # regular file is replaced with a link or vice versa, it is + # indicated in compare_mode instead. + if item1.get(b'deleted'): + return 'added link' + elif item2.get(b'deleted'): + return 'removed link' + elif item1[b'source'] != item2[b'source']: + return 'changed link' + + def contents_changed(item1, item2): + if can_compare_chunk_ids: + return item1[b'chunks'] != item2[b'chunks'] + else: + if sum_chunk_size(item1) != sum_chunk_size(item2): + return True + else: + chunk_ids1 = [c[0] for c in item1[b'chunks']] + chunk_ids2 = [c[0] for c in item2[b'chunks']] + return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) + + def compare_content(path, item1, item2): + if contents_changed(item1, item2): + if item1.get(b'deleted'): + return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2)))) + elif item2.get(b'deleted'): + return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1)))) + else: + chunk_id_set1 = {c[0] for c in item1[b'chunks']} + chunk_id_set2 = {c[0] for c in item2[b'chunks']} + added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1)) + removed = -sum(c[1] for c in (chunk_id_set1 - chunk_id_set2)) + + return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True), + format_file_size(removed, precision=1, sign=True))) + + def compare_directory(item1, item2): + if item2.get(b'deleted') and not item1.get(b'deleted'): + return 'removed directory' + elif item1.get(b'deleted') and not item2.get(b'deleted'): + return 'added directory' + + def compare_owner(item1, item2): + user1, group1 = get_owner(item1) + user2, group2 = get_owner(item2) + if user1 != user2 or group1 != group2: + return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2) + + def compare_mode(item1, item2): + if item1[b'mode'] != item2[b'mode']: + return '[{} -> {}]'.format(get_mode(item1), get_mode(item2)) + + def compare_items(path, item1, item2, hardlink_masters, deleted=False): """ Compare two items with identical paths. :param deleted: Whether one of the items has been deleted """ + changes = [] + + if item1.get(b'hardlink_master') or item2.get(b'hardlink_master'): + hardlink_masters[path] = (item1, item2) + + if has_hardlink_master(item1, hardlink_masters): + item1 = hardlink_masters[item1[b'source']][0] + + if has_hardlink_master(item2, hardlink_masters): + item2 = hardlink_masters[item2[b'source']][1] + + if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': + changes.append(compare_link(item1, item2)) + + if b'chunks' in item1 and b'chunks' in item2: + changes.append(compare_content(path, item1, item2)) + + if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': + changes.append(compare_directory(item1, item2)) + if not deleted: - if item1[b'mode'] != item2[b'mode']: - print(remove_surrogates(path), 'different mode') - print('\t', args.location.archive, stat.filemode(item1[b'mode'])) - print('\t', args.archive2, stat.filemode(item2[b'mode'])) + changes.append(compare_owner(item1, item2)) + changes.append(compare_mode(item1, item2)) - user1, group1 = get_owner(item1) - user2, group2 = get_owner(item2) - if user1 != user2 or group1 != group2: - print(remove_surrogates(path), 'different owner') - print('\t', args.location.archive, 'user=%s, group=%s' % (user1, group1)) - print('\t', args.archive2, 'user=%s, group=%s' % (user2, group2)) - - if not stat.S_ISREG(item1[b'mode']): - return - if b'chunks' not in item1 or b'chunks' not in item2: - # At least one of the items is a link - if item1.get(b'source') != item2.get(b'source'): - print(remove_surrogates(path), 'different link') - print('\t', args.location.archive, item1.get(b'source', '')) - print('\t', args.archive2, item2.get(b'source', '')) - return - if deleted or not can_compare_chunk_ids or item1[b'chunks'] != item2[b'chunks']: - # Contents are different - chunk_ids1 = [c[0] for c in item1[b'chunks']] - chunk_ids2 = [c[0] for c in item2[b'chunks']] - chunk_id_set1 = set(chunk_ids1) - chunk_id_set2 = set(chunk_ids2) - total1 = None if item1.get(b'deleted') else sum(c[1] for c in item1[b'chunks']) - total2 = None if item2.get(b'deleted') else sum(c[1] for c in item2[b'chunks']) - if (not can_compare_chunk_ids and total1 == total2 and not deleted and - fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)): - return - added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1)) - removed = sum(c[1] for c in (chunk_id_set1 - chunk_id_set2)) - print(remove_surrogates(path), 'different contents') - print('\t +%s, -%s, %s, %s' % (format_bytes(added), format_bytes(removed), - format_bytes(total1), format_bytes(total2))) + changes = [x for x in changes if x] + if changes: + print("{:<19} {}".format(' '.join(changes), remove_surrogates(path))) def compare_archives(archive1, archive2, matcher): orphans_archive1 = {} orphans_archive2 = {} + hardlink_masters = {} for item1, item2 in zip_longest( archive1.iter_items(lambda item: matcher.match(item[b'path'])), archive2.iter_items(lambda item: matcher.match(item[b'path'])), ): if item1 and item2 and item1[b'path'] == item2[b'path']: - compare_items(item1[b'path'], item1, item2) + compare_items(item1[b'path'], item1, item2, hardlink_masters) continue if item1: matching_orphan = orphans_archive2.pop(item1[b'path'], None) if matching_orphan: - compare_items(item1[b'path'], item1, matching_orphan) + compare_items(item1[b'path'], item1, matching_orphan, hardlink_masters) else: orphans_archive1[item1[b'path']] = item1 if item2: matching_orphan = orphans_archive1.pop(item2[b'path'], None) if matching_orphan: - compare_items(item2[b'path'], matching_orphan, item2) + compare_items(item2[b'path'], matching_orphan, item2, hardlink_masters) else: orphans_archive2[item2[b'path']] = item2 # At this point orphans_* contain items that had no matching partner in the other archive @@ -520,12 +573,12 @@ class Archiver: compare_items(added[b'path'], { b'deleted': True, b'chunks': [], - }, added, deleted=True) + }, added, hardlink_masters, deleted=True) for deleted in orphans_archive1.values(): compare_items(deleted[b'path'], deleted, { b'deleted': True, b'chunks': [], - }, deleted=True) + }, hardlink_masters, deleted=True) archive1 = archive archive2 = Archive(repository, key, manifest, args.archive2) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 7720bc5dd..5de638f20 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1236,33 +1236,101 @@ class DiffArchiverTestCase(ArchiverTestCaseBase): create_regular_file = ArchiverTestCase.create_regular_file def test_basic_functionality(self): + # Initialize test folder self.create_test_files() self.cmd('init', self.repository_location) - os.chmod('input/dir2', stat.S_IFDIR | 0o755) - self.create_regular_file('file3', size=1024) + + # Setup files for the first snapshot + self.create_regular_file('file_unchanged', size=128) + self.create_regular_file('file_removed', size=256) + self.create_regular_file('file_removed2', size=512) + self.create_regular_file('file_replaced', size=1024) + os.mkdir('input/dir_replaced_with_file') + os.chmod('input/dir_replaced_with_file', stat.S_IFDIR | 0o755) + os.mkdir('input/dir_removed') + os.symlink('input/dir_replaced_with_file', 'input/link_changed') + os.symlink('input/file_unchanged', 'input/link_removed') + os.symlink('input/file_removed2', 'input/link_target_removed') + os.symlink('input/empty', 'input/symlink') + os.link('input/empty', 'input/hardlink_contents_changed') + os.link('input/file_removed', 'input/hardlink_removed') + os.link('input/file_removed2', 'input/hardlink_target_removed') + os.link('input/file_replaced', 'input/hardlink_target_replaced') + + # Create the first snapshot self.cmd('create', self.repository_location + '::test0', 'input') - # replace 'hardlink' with a file - os.unlink('input/hardlink') - self.create_regular_file('hardlink', size=1024 * 80) - # replace directory with a file - os.unlink('input/dir2/file2') - os.rmdir('input/dir2') - self.create_regular_file('dir2', size=1024 * 80) - os.chmod('input/dir2', stat.S_IFREG | 0o755) - self.create_regular_file('file3', size=1024, contents=b'0') + + # Setup files for the second snapshot + self.create_regular_file('file_added', size=2048) + os.unlink('input/file_removed') + os.unlink('input/file_removed2') + os.unlink('input/file_replaced') + self.create_regular_file('file_replaced', size=4096, contents=b'0') + os.rmdir('input/dir_replaced_with_file') + self.create_regular_file('dir_replaced_with_file', size=8192 * 80) + os.chmod('input/dir_replaced_with_file', stat.S_IFREG | 0o755) + os.mkdir('input/dir_added') + os.rmdir('input/dir_removed') + os.unlink('input/link_changed') + os.symlink('input/dir_added', 'input/link_changed') + os.symlink('input/dir_added', 'input/link_added') + os.unlink('input/link_removed') + os.unlink('input/hardlink_removed') + os.link('input/file_added', 'input/hardlink_added') + + with open('input/empty', 'ab') as fd: + fd.write(b'appended_data') + + # Create the second snapshot self.cmd('create', self.repository_location + '::test1a', 'input') self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input') def do_asserts(output, archive): - assert 'input/file3 different contents' in output - assert 'input/hardlink different mode' in output - assert ('input/hardlink different link\n' - ' test0 input/file1\n' - ' test%s ' % archive) in output - assert ('input/dir2 different mode\n' - ' test0 drwxr-xr-x\n' - ' test%s -rwxr-xr-x\n' % archive) in output - assert 'input/dir2/file2 different contents' in output + # File contents changed (deleted and replaced with a new file) + assert 'B input/file_replaced' in output + + # File unchanged + assert 'input/file_unchanged' not in output + + # Directory replaced with a regular file + assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output + + # Basic directory cases + assert 'added directory input/dir_added' in output + assert 'removed directory input/dir_removed' in output + + # Basic symlink cases + assert 'changed link input/link_changed' in output + assert 'added link input/link_added' in output + assert 'removed link input/link_removed' in output + + # Symlink target removed. Should not affect the symlink at all. + assert 'input/link_target_removed' not in output + + # The inode has two links and the file contents changed. Borg + # should notice the changes in both links. + assert '0 B input/empty' in output + assert '0 B input/hardlink_contents_changed' in output + + # Added a new file and a hard link to it. Both links to the same + # inode should appear as separate files. + assert 'added 2.05 kB input/file_added' in output + assert 'added 2.05 kB input/hardlink_added' in output + + # The inode has two links and both of them are deleted. They should + # appear as two deleted files. + assert 'removed 256 B input/file_removed' in output + assert 'removed 256 B input/hardlink_removed' in output + + # Another link (marked previously as the source in borg) to the + # same inode was removed. This should not change this link at all. + assert 'input/hardlink_target_removed' not in output + + # Another link (marked previously as the source in borg) to the + # same inode was replaced with a new regular file. This should not + # change this link at all. + assert 'input/hardlink_target_replaced' not in output + do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), '1a') # We expect exit_code=1 due to the chunker params warning do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b') diff --git a/docs/usage.rst b/docs/usage.rst index 70db163d6..a29a56f96 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -399,26 +399,18 @@ Examples $ cd .. $ borg diff testrepo::archive1 archive2 - file1 different mode - archive1 -rw-r--r-- - archive2 -rwxr-xr-x - file2 different contents - +28 B, -31 B, 4.19 MB, 4.19 MB + [-rw-r--r-- -> -rwxr-xr-x] file1 + +135 B -252 B file2 $ borg diff testrepo::archive2 archive3 - file3 different contents - +0 B, -0 B, 0 B, + added 0 B file4 + removed 0 B file3 $ borg diff testrepo::archive1 archive3 - file1 different mode - archive1 -rw-r--r-- - archive3 -rwxr-xr-x - file2 different contents - +28 B, -31 B, 4.19 MB, 4.19 MB - file3 different contents - +0 B, -0 B, 0 B, - file4 different contents - +0 B, -0 B, , 0 B + [-rw-r--r-- -> -rwxr-xr-x] file1 + +135 B -252 B file2 + added 0 B file4 + removed 0 B file3 .. include:: usage/delete.rst.inc