mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-07 15:08:44 +00:00
Merge pull request #559 from ThomasWaldmann/metastream-chunker
finer chunker granularity for items metadata stream, fixes #547, fixes #487
This commit is contained in:
commit
f35ba0b577
2 changed files with 11 additions and 6 deletions
|
@ -34,6 +34,9 @@
|
|||
# defaults, use --chunker-params to override
|
||||
CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
|
||||
|
||||
# chunker params for the items metadata stream, finer granularity
|
||||
ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE)
|
||||
|
||||
utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
|
||||
utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {})
|
||||
has_mtime_ns = sys.version >= '3.3'
|
||||
|
@ -75,7 +78,7 @@ def fetch_many(self, ids, is_preloaded=False):
|
|||
class ChunkBuffer:
|
||||
BUFFER_SIZE = 1 * 1024 * 1024
|
||||
|
||||
def __init__(self, key, chunker_params=CHUNKER_PARAMS):
|
||||
def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS):
|
||||
self.buffer = BytesIO()
|
||||
self.packer = msgpack.Packer(unicode_errors='surrogateescape')
|
||||
self.chunks = []
|
||||
|
@ -110,7 +113,7 @@ def is_full(self):
|
|||
|
||||
class CacheChunkBuffer(ChunkBuffer):
|
||||
|
||||
def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
|
||||
def __init__(self, cache, key, stats, chunker_params=ITEMS_CHUNKER_PARAMS):
|
||||
super().__init__(key, chunker_params)
|
||||
self.cache = cache
|
||||
self.stats = stats
|
||||
|
@ -150,7 +153,7 @@ def __init__(self, repository, key, manifest, name, cache=None, create=False,
|
|||
self.end = end
|
||||
self.pipeline = DownloadPipeline(self.repository, self.key)
|
||||
if create:
|
||||
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params)
|
||||
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
|
||||
self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
|
||||
if name in manifest.archives:
|
||||
raise self.AlreadyExists(name)
|
||||
|
|
|
@ -190,9 +190,11 @@ Each item represents a file, directory or other fs item and is stored as an
|
|||
it and it is reset every time an inode's metadata is changed.
|
||||
|
||||
All items are serialized using msgpack and the resulting byte stream
|
||||
is fed into the same chunker used for regular file data and turned
|
||||
into deduplicated chunks. The reference to these chunks is then added
|
||||
to the archive metadata.
|
||||
is fed into the same chunker algorithm as used for regular file data
|
||||
and turned into deduplicated chunks. The reference to these chunks is then added
|
||||
to the archive metadata. To achieve a finer granularity on this metadata
|
||||
stream, we use different chunker params for this chunker, which result in
|
||||
smaller chunks.
|
||||
|
||||
A chunk is stored as an object as well, of course.
|
||||
|
||||
|
|
Loading…
Reference in a new issue