Chunker: yield Chunk namedtuple instead of bytes/memoryview

This commit is contained in:
Thomas Waldmann 2020-12-14 23:46:04 +01:00
parent 37d4aee122
commit 8c299696aa
2 changed files with 31 additions and 4 deletions

View File

@ -4,6 +4,9 @@ API_VERSION = '1.2_01'
import errno
import os
from collections import namedtuple
from .constants import CH_DATA, CH_HOLE
from libc.stdlib cimport free
@ -26,6 +29,25 @@ cdef extern from "_chunker.c":
has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
_Chunk = namedtuple('_Chunk', 'meta data')
_Chunk.__doc__ = """\
Chunk namedtuple
meta is always a dictionary, data depends on allocation.
on disk data:
meta = {'allocation' = CH_DATA, 'size' = size_of_data }
data = read_data [bytes or memoryview]
hole in a sparse file:
meta = {'allocation' = CH_HOLE, 'size' = size_of_hole }
data = None
"""
def Chunk(data, **meta):
return _Chunk(meta, data)
def dread(offset, size, fd=None, fh=-1):
use_fh = fh >= 0
if use_fh:
@ -178,15 +200,16 @@ class ChunkerFixed:
if is_data:
# read block from the range
data = dread(offset, wanted, fd, fh)
got = len(data)
else: # hole
# seek over block from the range
pos = dseek(wanted, os.SEEK_CUR, fd, fh)
data = self.zeros[:pos - offset] # for now, create zero-bytes here
got = len(data)
data = None
got = pos - offset
if got > 0:
offset += got
range_size -= got
yield data # later, use a better api that tags data vs. hole
yield Chunk(data, size=got, allocation=CH_DATA if is_data else CH_HOLE)
if got < wanted:
# we did not get enough data, looks like EOF.
return
@ -233,7 +256,8 @@ cdef class Chunker:
return self
def __next__(self):
return chunker_process(self.chunker)
data = chunker_process(self.chunker)
return Chunk(data, size=len(data), allocation=CH_DATA) # no sparse support here
def get_chunker(algo, *params, **kw):

View File

@ -75,6 +75,9 @@ CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH
# chunker params for the items metadata stream, finer granularity
ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE)
# normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros)
CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2
# operating mode of the files cache (for fast skipping of unchanged files)
DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode'
DEFAULT_FILES_CACHE_MODE = 'cis' # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI)