From c394a31d6238ceea84a4605daac2dc2469e284b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Sun, 16 Feb 2014 23:36:48 +0100 Subject: [PATCH] Improved archive metadata deduplication. --- CHANGES | 1 + attic/archive.py | 8 ++++---- attic/helpers.py | 6 ++++++ attic/testsuite/helpers.py | 13 ++++++++++++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/CHANGES b/CHANGES index accc3cd8c..a7e036355 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,7 @@ Version 0.11 - Documentation improvements - Fix exception during "attic create" with repeated files (#39) - New "--exclude-from" option for attic create/extract/verify. +- Improved archive metadata deduplication. Version 0.10 ------------ diff --git a/attic/archive.py b/attic/archive.py index 508538bb9..ea05b6e04 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -16,7 +16,7 @@ from attic.chunker import chunkify from attic.hashindex import ChunkIndex from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \ - Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe + Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict ITEMS_BUFFER = 1024 * 1024 CHUNK_MIN = 1024 @@ -63,7 +63,7 @@ def __init__(self, key): self.key = key def add(self, item): - self.buffer.write(self.packer.pack(item)) + self.buffer.write(self.packer.pack(StableDict(item))) if self.is_full(): self.flush() @@ -348,7 +348,7 @@ def stat_attrs(self, st, path): item[b'user'] = item[b'group'] = None xattrs = xattr.get_all(path, follow_symlinks=False) if xattrs: - item[b'xattrs'] = xattrs + item[b'xattrs'] = StableDict(xattrs) return item def process_item(self, path, st): @@ -549,7 +549,7 @@ def missing_chunk_detector(chunk_id): for state, items in groupby(archive[b'items'], missing_chunk_detector): if state != prev_state: - unpacker = msgpack.Unpacker() + unpacker = msgpack.Unpacker(object_hook=StableDict) prev_state = state if state % 2: self.report_progress('Archive metadata damage detected', error=True) diff --git a/attic/helpers.py b/attic/helpers.py index 2f805ebed..6646f785d 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -469,6 +469,12 @@ def daemonize(): os.dup2(fd, 2) +class StableDict(dict): + """A dict subclass with stable items() ordering""" + def items(self): + return sorted(super(StableDict, self).items()) + + if sys.version < '3.3': # st_mtime_ns attribute only available in 3.3+ def st_mtime_ns(st): diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index 0afc4c608..cd6c0abe9 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -1,10 +1,13 @@ +import hashlib from time import mktime, strptime from datetime import datetime, timezone, timedelta import os import tempfile import unittest -from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime +from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \ + StableDict from attic.testsuite import AtticTestCase +import msgpack class LocationTestCase(AtticTestCase): @@ -176,3 +179,11 @@ def dotest(test_archives, within, indices): dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5]) dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5]) dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5]) + + +class StableDictTestCase(AtticTestCase): + + def test(self): + d = StableDict(foo=1, bar=2, boo=3, baz=4) + self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)]) + self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')