From 9f6df7d999ba51af091869759bcbde4107c8d5cd Mon Sep 17 00:00:00 2001 From: Simon Frei Date: Wed, 19 Jul 2017 12:56:05 +0200 Subject: [PATCH] Only move and change indendation of code - NOT functional --- src/borg/archive.py | 130 ++++++++++++++++++++++++++ src/borg/archiver.py | 216 +------------------------------------------ src/borg/item.pyx | 82 ++++++++++++++++ 3 files changed, 215 insertions(+), 213 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index b27f48aa1..a3829b661 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -819,6 +819,136 @@ Utilization of max. archive size: {csize_max:.0%} # Was this EPERM due to the O_NOATIME flag? Try again without it: return os.open(path, flags_normal) + def compare_archives(archive1, archive2, matcher): + + def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2): + chunks1 = archive1.pipeline.fetch_many(chunk_ids1) + chunks2 = archive2.pipeline.fetch_many(chunk_ids2) + return self.compare_chunk_contents(chunks1, chunks2) + + def sum_chunk_size(item, consider_ids=None): + if item.get('deleted'): + size = None + else: + if consider_ids is not None: # consider only specific chunks + size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids) + else: # consider all chunks + size = item.get_size() + return size + + def get_owner(item): + if args.numeric_owner: + return item.uid, item.gid + else: + return item.user, item.group + + def get_mode(item): + if 'mode' in item: + return stat.filemode(item.mode) + else: + return [None] + + def hardlink_master_seen(item): + return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters + + def is_hardlink_master(item): + return item.get('hardlink_master', True) and 'source' not in item + + def update_hardlink_masters(item1, item2): + if is_hardlink_master(item1) or is_hardlink_master(item2): + hardlink_masters[item1.path] = (item1, item2) + + def has_hardlink_master(item, hardlink_masters): + return hardlinkable(item.mode) and item.get('source') in hardlink_masters + + def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): + """ + Compare two items with identical paths. + :param deleted: Whether one of the items has been deleted + """ + changes = [] + + if has_hardlink_master(item1, hardlink_masters): + item1 = hardlink_masters[item1.source][0] + + if has_hardlink_master(item2, hardlink_masters): + item2 = hardlink_masters[item2.source][1] + + if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': + changes.append(compare_link(item1, item2)) + + if 'chunks' in item1 and 'chunks' in item2: + changes.append(compare_content(path, item1, item2)) + + if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': + changes.append(compare_directory(item1, item2)) + + if not deleted: + changes.append(compare_owner(item1, item2)) + changes.append(compare_mode(item1, item2)) + + changes = [x for x in changes if x] + if changes: + output_line = (remove_surrogates(path), ' '.join(changes)) + + if args.sort: + output.append(output_line) + else: + print_output(output_line) + + def compare_or_defer(item1, item2): + update_hardlink_masters(item1, item2) + if not hardlink_master_seen(item1) or not hardlink_master_seen(item2): + deferred.append((item1, item2)) + else: + compare_items(output, item1.path, item1, item2, hardlink_masters) + + orphans_archive1 = collections.OrderedDict() + orphans_archive2 = collections.OrderedDict() + deferred = [] + hardlink_masters = {} + output = [] + + for item1, item2 in zip_longest( + archive1.iter_items(lambda item: matcher.match(item.path)), + archive2.iter_items(lambda item: matcher.match(item.path)), + ): + if item1 and item2 and item1.path == item2.path: + compare_or_defer(item1, item2) + continue + if item1: + matching_orphan = orphans_archive2.pop(item1.path, None) + if matching_orphan: + compare_or_defer(item1, matching_orphan) + else: + orphans_archive1[item1.path] = item1 + if item2: + matching_orphan = orphans_archive1.pop(item2.path, None) + if matching_orphan: + compare_or_defer(matching_orphan, item2) + else: + orphans_archive2[item2.path] = item2 + # At this point orphans_* contain items that had no matching partner in the other archive + deleted_item = Item( + deleted=True, + chunks=[], + mode=0, + ) + for added in orphans_archive2.values(): + path = added.path + deleted_item.path = path + update_hardlink_masters(deleted_item, added) + compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True) + for deleted in orphans_archive1.values(): + path = deleted.path + deleted_item.path = path + update_hardlink_masters(deleted, deleted_item) + compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True) + for item1, item2 in deferred: + assert hardlink_master_seen(item1) + assert hardlink_master_seen(item2) + compare_items(output, item1.path, item1, item2, hardlink_masters) + class MetadataCollector: def __init__(self, *, noatime, noctime, numeric_owner): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index ebb4b5e1f..1b6ce0324 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -195,33 +195,6 @@ class Archiver: else: logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path)) - @staticmethod - def compare_chunk_contents(chunks1, chunks2): - """Compare two chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`)""" - end = object() - alen = ai = 0 - blen = bi = 0 - while True: - if not alen - ai: - a = next(chunks1, end) - if a is end: - return not blen - bi and next(chunks2, end) is end - a = memoryview(a) - alen = len(a) - ai = 0 - if not blen - bi: - b = next(chunks2, end) - if b is end: - return not alen - ai and next(chunks1, end) is end - b = memoryview(b) - blen = len(b) - bi = 0 - slicelen = min(alen - ai, blen - bi) - if a[ai:ai + slicelen] != b[bi:bi + slicelen]: - return False - ai += slicelen - bi += slicelen - @staticmethod def build_matcher(inclexcl_patterns, include_paths): matcher = PatternMatcher() @@ -966,196 +939,10 @@ class Archiver: @with_archive def do_diff(self, args, repository, manifest, key, archive): """Diff contents of two archives""" - def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2): - chunks1 = archive1.pipeline.fetch_many(chunk_ids1) - chunks2 = archive2.pipeline.fetch_many(chunk_ids2) - return self.compare_chunk_contents(chunks1, chunks2) - - def sum_chunk_size(item, consider_ids=None): - if item.get('deleted'): - size = None - else: - if consider_ids is not None: # consider only specific chunks - size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids) - else: # consider all chunks - size = item.get_size() - return size - - def get_owner(item): - if args.numeric_owner: - return item.uid, item.gid - else: - return item.user, item.group - - def get_mode(item): - if 'mode' in item: - return stat.filemode(item.mode) - else: - return [None] - - def has_hardlink_master(item, hardlink_masters): - return hardlinkable(item.mode) and item.get('source') in hardlink_masters - - def compare_link(item1, item2): - # These are the simple link cases. For special cases, e.g. if a - # regular file is replaced with a link or vice versa, it is - # indicated in compare_mode instead. - if item1.get('deleted'): - return 'added link' - elif item2.get('deleted'): - return 'removed link' - elif 'source' in item1 and 'source' in item2 and item1.source != item2.source: - return 'changed link' - - def contents_changed(item1, item2): - if can_compare_chunk_ids: - return item1.chunks != item2.chunks - else: - if sum_chunk_size(item1) != sum_chunk_size(item2): - return True - else: - chunk_ids1 = [c.id for c in item1.chunks] - chunk_ids2 = [c.id for c in item2.chunks] - return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) - - def compare_content(path, item1, item2): - if contents_changed(item1, item2): - if item1.get('deleted'): - return 'added {:>13}'.format(format_file_size(sum_chunk_size(item2))) - if item2.get('deleted'): - return 'removed {:>11}'.format(format_file_size(sum_chunk_size(item1))) - if not can_compare_chunk_ids: - return 'modified' - chunk_ids1 = {c.id for c in item1.chunks} - chunk_ids2 = {c.id for c in item2.chunks} - added_ids = chunk_ids2 - chunk_ids1 - removed_ids = chunk_ids1 - chunk_ids2 - added = sum_chunk_size(item2, added_ids) - removed = sum_chunk_size(item1, removed_ids) - return '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True), - format_file_size(-removed, precision=1, sign=True)) - - def compare_directory(item1, item2): - if item2.get('deleted') and not item1.get('deleted'): - return 'removed directory' - elif item1.get('deleted') and not item2.get('deleted'): - return 'added directory' - - def compare_owner(item1, item2): - user1, group1 = get_owner(item1) - user2, group2 = get_owner(item2) - if user1 != user2 or group1 != group2: - return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2) - - def compare_mode(item1, item2): - if item1.mode != item2.mode: - return '[{} -> {}]'.format(get_mode(item1), get_mode(item2)) - - def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): - """ - Compare two items with identical paths. - :param deleted: Whether one of the items has been deleted - """ - changes = [] - - if has_hardlink_master(item1, hardlink_masters): - item1 = hardlink_masters[item1.source][0] - - if has_hardlink_master(item2, hardlink_masters): - item2 = hardlink_masters[item2.source][1] - - if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': - changes.append(compare_link(item1, item2)) - - if 'chunks' in item1 and 'chunks' in item2: - changes.append(compare_content(path, item1, item2)) - - if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': - changes.append(compare_directory(item1, item2)) - - if not deleted: - changes.append(compare_owner(item1, item2)) - changes.append(compare_mode(item1, item2)) - - changes = [x for x in changes if x] - if changes: - output_line = (remove_surrogates(path), ' '.join(changes)) - - if args.sort: - output.append(output_line) - else: - print_output(output_line) def print_output(line): print("{:<19} {}".format(line[1], line[0])) - def compare_archives(archive1, archive2, matcher): - def hardlink_master_seen(item): - return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters - - def is_hardlink_master(item): - return item.get('hardlink_master', True) and 'source' not in item - - def update_hardlink_masters(item1, item2): - if is_hardlink_master(item1) or is_hardlink_master(item2): - hardlink_masters[item1.path] = (item1, item2) - - def compare_or_defer(item1, item2): - update_hardlink_masters(item1, item2) - if not hardlink_master_seen(item1) or not hardlink_master_seen(item2): - deferred.append((item1, item2)) - else: - compare_items(output, item1.path, item1, item2, hardlink_masters) - - orphans_archive1 = collections.OrderedDict() - orphans_archive2 = collections.OrderedDict() - deferred = [] - hardlink_masters = {} - output = [] - - for item1, item2 in zip_longest( - archive1.iter_items(lambda item: matcher.match(item.path)), - archive2.iter_items(lambda item: matcher.match(item.path)), - ): - if item1 and item2 and item1.path == item2.path: - compare_or_defer(item1, item2) - continue - if item1: - matching_orphan = orphans_archive2.pop(item1.path, None) - if matching_orphan: - compare_or_defer(item1, matching_orphan) - else: - orphans_archive1[item1.path] = item1 - if item2: - matching_orphan = orphans_archive1.pop(item2.path, None) - if matching_orphan: - compare_or_defer(matching_orphan, item2) - else: - orphans_archive2[item2.path] = item2 - # At this point orphans_* contain items that had no matching partner in the other archive - deleted_item = Item( - deleted=True, - chunks=[], - mode=0, - ) - for added in orphans_archive2.values(): - path = added.path - deleted_item.path = path - update_hardlink_masters(deleted_item, added) - compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True) - for deleted in orphans_archive1.values(): - path = deleted.path - deleted_item.path = path - update_hardlink_masters(deleted, deleted_item) - compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True) - for item1, item2 in deferred: - assert hardlink_master_seen(item1) - assert hardlink_master_seen(item2) - compare_items(output, item1.path, item1, item2, hardlink_masters) - - for line in sorted(output): - print_output(line) - archive1 = archive archive2 = Archive(repository, key, manifest, args.archive2, consider_part_files=args.consider_part_files) @@ -1171,6 +958,9 @@ class Archiver: compare_archives(archive1, archive2, matcher) + for line in sorted(output): + print_output(line) + for pattern in matcher.get_unmatched_include_patterns(): self.print_warning("Include pattern '%s' never matched.", pattern) diff --git a/src/borg/item.pyx b/src/borg/item.pyx index f8b67ddd7..1f01bea2a 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -358,3 +358,85 @@ class ManifestItem(PropDict): timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) config = PropDict._make_property('config', dict) item_keys = PropDict._make_property('item_keys', tuple) + + def compare_link(item1, item2): + # These are the simple link cases. For special cases, e.g. if a + # regular file is replaced with a link or vice versa, it is + # indicated in compare_mode instead. + if item1.get('deleted'): + return 'added link' + elif item2.get('deleted'): + return 'removed link' + elif 'source' in item1 and 'source' in item2 and item1.source != item2.source: + return 'changed link' + +def compare_content(path, item1, item2): + if contents_changed(item1, item2): + if item1.get('deleted'): + return 'added {:>13}'.format(format_file_size(sum_chunk_size(item2))) + if item2.get('deleted'): + return 'removed {:>11}'.format(format_file_size(sum_chunk_size(item1))) + if not can_compare_chunk_ids: + return 'modified' + chunk_ids1 = {c.id for c in item1.chunks} + chunk_ids2 = {c.id for c in item2.chunks} + added_ids = chunk_ids2 - chunk_ids1 + removed_ids = chunk_ids1 - chunk_ids2 + added = sum_chunk_size(item2, added_ids) + removed = sum_chunk_size(item1, removed_ids) + return '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True), + format_file_size(-removed, precision=1, sign=True)) + + def compare_directory(item1, item2): + if item2.get('deleted') and not item1.get('deleted'): + return 'removed directory' + elif item1.get('deleted') and not item2.get('deleted'): + return 'added directory' + + def compare_owner(item1, item2): + user1, group1 = get_owner(item1) + user2, group2 = get_owner(item2) + if user1 != user2 or group1 != group2: + return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2) + + def compare_mode(item1, item2): + if item1.mode != item2.mode: + return '[{} -> {}]'.format(get_mode(item1), get_mode(item2)) + + def contents_changed(item1, item2): + if can_compare_chunk_ids: + return item1.chunks != item2.chunks + else: + if sum_chunk_size(item1) != sum_chunk_size(item2): + return True + else: + chunk_ids1 = [c.id for c in item1.chunks] + chunk_ids2 = [c.id for c in item2.chunks] + return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) + + @staticmethod + def compare_chunk_contents(chunks1, chunks2): + """Compare two chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`)""" + end = object() + alen = ai = 0 + blen = bi = 0 + while True: + if not alen - ai: + a = next(chunks1, end) + if a is end: + return not blen - bi and next(chunks2, end) is end + a = memoryview(a) + alen = len(a) + ai = 0 + if not blen - bi: + b = next(chunks2, end) + if b is end: + return not alen - ai and next(chunks1, end) is end + b = memoryview(b) + blen = len(b) + bi = 0 + slicelen = min(alen - ai, blen - bi) + if a[ai:ai + slicelen] != b[bi:bi + slicelen]: + return False + ai += slicelen + bi += slicelen