1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-26 07:53:58 +00:00

Improved archive metadata deduplication.

This commit is contained in:
Jonas Borgström 2014-02-16 23:36:48 +01:00
parent f942b070e2
commit c394a31d62
4 changed files with 23 additions and 5 deletions

View file

@ -12,6 +12,7 @@ Version 0.11
- Documentation improvements
- Fix exception during "attic create" with repeated files (#39)
- New "--exclude-from" option for attic create/extract/verify.
- Improved archive metadata deduplication.
Version 0.10
------------

View file

@ -16,7 +16,7 @@
from attic.chunker import chunkify
from attic.hashindex import ChunkIndex
from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \
Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe
Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict
ITEMS_BUFFER = 1024 * 1024
CHUNK_MIN = 1024
@ -63,7 +63,7 @@ def __init__(self, key):
self.key = key
def add(self, item):
self.buffer.write(self.packer.pack(item))
self.buffer.write(self.packer.pack(StableDict(item)))
if self.is_full():
self.flush()
@ -348,7 +348,7 @@ def stat_attrs(self, st, path):
item[b'user'] = item[b'group'] = None
xattrs = xattr.get_all(path, follow_symlinks=False)
if xattrs:
item[b'xattrs'] = xattrs
item[b'xattrs'] = StableDict(xattrs)
return item
def process_item(self, path, st):
@ -549,7 +549,7 @@ def missing_chunk_detector(chunk_id):
for state, items in groupby(archive[b'items'], missing_chunk_detector):
if state != prev_state:
unpacker = msgpack.Unpacker()
unpacker = msgpack.Unpacker(object_hook=StableDict)
prev_state = state
if state % 2:
self.report_progress('Archive metadata damage detected', error=True)

View file

@ -469,6 +469,12 @@ def daemonize():
os.dup2(fd, 2)
class StableDict(dict):
"""A dict subclass with stable items() ordering"""
def items(self):
return sorted(super(StableDict, self).items())
if sys.version < '3.3':
# st_mtime_ns attribute only available in 3.3+
def st_mtime_ns(st):

View file

@ -1,10 +1,13 @@
import hashlib
from time import mktime, strptime
from datetime import datetime, timezone, timedelta
import os
import tempfile
import unittest
from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime
from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \
StableDict
from attic.testsuite import AtticTestCase
import msgpack
class LocationTestCase(AtticTestCase):
@ -176,3 +179,11 @@ def dotest(test_archives, within, indices):
dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5])
class StableDictTestCase(AtticTestCase):
def test(self):
d = StableDict(foo=1, bar=2, boo=3, baz=4)
self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)])
self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f')