From 8c299696aa21f6a64eee64663410d7e06e33529f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 14 Dec 2020 23:46:04 +0100 Subject: [PATCH] Chunker: yield Chunk namedtuple instead of bytes/memoryview --- src/borg/chunker.pyx | 32 ++++++++++++++++++++++++++++---- src/borg/constants.py | 3 +++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index 03122ec4b..0b6f66546 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -4,6 +4,9 @@ API_VERSION = '1.2_01' import errno import os +from collections import namedtuple + +from .constants import CH_DATA, CH_HOLE from libc.stdlib cimport free @@ -26,6 +29,25 @@ cdef extern from "_chunker.c": has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE') +_Chunk = namedtuple('_Chunk', 'meta data') +_Chunk.__doc__ = """\ + Chunk namedtuple + + meta is always a dictionary, data depends on allocation. + + on disk data: + meta = {'allocation' = CH_DATA, 'size' = size_of_data } + data = read_data [bytes or memoryview] + + hole in a sparse file: + meta = {'allocation' = CH_HOLE, 'size' = size_of_hole } + data = None +""" + +def Chunk(data, **meta): + return _Chunk(meta, data) + + def dread(offset, size, fd=None, fh=-1): use_fh = fh >= 0 if use_fh: @@ -178,15 +200,16 @@ class ChunkerFixed: if is_data: # read block from the range data = dread(offset, wanted, fd, fh) + got = len(data) else: # hole # seek over block from the range pos = dseek(wanted, os.SEEK_CUR, fd, fh) - data = self.zeros[:pos - offset] # for now, create zero-bytes here - got = len(data) + data = None + got = pos - offset if got > 0: offset += got range_size -= got - yield data # later, use a better api that tags data vs. hole + yield Chunk(data, size=got, allocation=CH_DATA if is_data else CH_HOLE) if got < wanted: # we did not get enough data, looks like EOF. return @@ -233,7 +256,8 @@ cdef class Chunker: return self def __next__(self): - return chunker_process(self.chunker) + data = chunker_process(self.chunker) + return Chunk(data, size=len(data), allocation=CH_DATA) # no sparse support here def get_chunker(algo, *params, **kw): diff --git a/src/borg/constants.py b/src/borg/constants.py index a20719c65..46c2b564c 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -75,6 +75,9 @@ CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH # chunker params for the items metadata stream, finer granularity ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE) +# normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros) +CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2 + # operating mode of the files cache (for fast skipping of unchanged files) DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode' DEFAULT_FILES_CACHE_MODE = 'cis' # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI)