1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-07 15:08:44 +00:00

Merge pull request #559 from ThomasWaldmann/metastream-chunker

finer chunker granularity for items metadata stream, fixes #547, fixes #487
This commit is contained in:
TW 2016-01-15 22:42:20 +01:00
commit f35ba0b577
2 changed files with 11 additions and 6 deletions

View file

@ -34,6 +34,9 @@
# defaults, use --chunker-params to override
CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
# chunker params for the items metadata stream, finer granularity
ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE)
utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {})
has_mtime_ns = sys.version >= '3.3'
@ -75,7 +78,7 @@ def fetch_many(self, ids, is_preloaded=False):
class ChunkBuffer:
BUFFER_SIZE = 1 * 1024 * 1024
def __init__(self, key, chunker_params=CHUNKER_PARAMS):
def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS):
self.buffer = BytesIO()
self.packer = msgpack.Packer(unicode_errors='surrogateescape')
self.chunks = []
@ -110,7 +113,7 @@ def is_full(self):
class CacheChunkBuffer(ChunkBuffer):
def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
def __init__(self, cache, key, stats, chunker_params=ITEMS_CHUNKER_PARAMS):
super().__init__(key, chunker_params)
self.cache = cache
self.stats = stats
@ -150,7 +153,7 @@ def __init__(self, repository, key, manifest, name, cache=None, create=False,
self.end = end
self.pipeline = DownloadPipeline(self.repository, self.key)
if create:
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params)
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
if name in manifest.archives:
raise self.AlreadyExists(name)

View file

@ -190,9 +190,11 @@ Each item represents a file, directory or other fs item and is stored as an
it and it is reset every time an inode's metadata is changed.
All items are serialized using msgpack and the resulting byte stream
is fed into the same chunker used for regular file data and turned
into deduplicated chunks. The reference to these chunks is then added
to the archive metadata.
is fed into the same chunker algorithm as used for regular file data
and turned into deduplicated chunks. The reference to these chunks is then added
to the archive metadata. To achieve a finer granularity on this metadata
stream, we use different chunker params for this chunker, which result in
smaller chunks.
A chunk is stored as an object as well, of course.