mirror of https://github.com/borgbackup/borg.git
Chunker: yield Chunk namedtuple instead of bytes/memoryview
This commit is contained in:
parent
37d4aee122
commit
8c299696aa
|
@ -4,6 +4,9 @@ API_VERSION = '1.2_01'
|
|||
|
||||
import errno
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
from .constants import CH_DATA, CH_HOLE
|
||||
|
||||
from libc.stdlib cimport free
|
||||
|
||||
|
@ -26,6 +29,25 @@ cdef extern from "_chunker.c":
|
|||
has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
|
||||
|
||||
|
||||
_Chunk = namedtuple('_Chunk', 'meta data')
|
||||
_Chunk.__doc__ = """\
|
||||
Chunk namedtuple
|
||||
|
||||
meta is always a dictionary, data depends on allocation.
|
||||
|
||||
on disk data:
|
||||
meta = {'allocation' = CH_DATA, 'size' = size_of_data }
|
||||
data = read_data [bytes or memoryview]
|
||||
|
||||
hole in a sparse file:
|
||||
meta = {'allocation' = CH_HOLE, 'size' = size_of_hole }
|
||||
data = None
|
||||
"""
|
||||
|
||||
def Chunk(data, **meta):
|
||||
return _Chunk(meta, data)
|
||||
|
||||
|
||||
def dread(offset, size, fd=None, fh=-1):
|
||||
use_fh = fh >= 0
|
||||
if use_fh:
|
||||
|
@ -178,15 +200,16 @@ class ChunkerFixed:
|
|||
if is_data:
|
||||
# read block from the range
|
||||
data = dread(offset, wanted, fd, fh)
|
||||
got = len(data)
|
||||
else: # hole
|
||||
# seek over block from the range
|
||||
pos = dseek(wanted, os.SEEK_CUR, fd, fh)
|
||||
data = self.zeros[:pos - offset] # for now, create zero-bytes here
|
||||
got = len(data)
|
||||
data = None
|
||||
got = pos - offset
|
||||
if got > 0:
|
||||
offset += got
|
||||
range_size -= got
|
||||
yield data # later, use a better api that tags data vs. hole
|
||||
yield Chunk(data, size=got, allocation=CH_DATA if is_data else CH_HOLE)
|
||||
if got < wanted:
|
||||
# we did not get enough data, looks like EOF.
|
||||
return
|
||||
|
@ -233,7 +256,8 @@ cdef class Chunker:
|
|||
return self
|
||||
|
||||
def __next__(self):
|
||||
return chunker_process(self.chunker)
|
||||
data = chunker_process(self.chunker)
|
||||
return Chunk(data, size=len(data), allocation=CH_DATA) # no sparse support here
|
||||
|
||||
|
||||
def get_chunker(algo, *params, **kw):
|
||||
|
|
|
@ -75,6 +75,9 @@ CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH
|
|||
# chunker params for the items metadata stream, finer granularity
|
||||
ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE)
|
||||
|
||||
# normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros)
|
||||
CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2
|
||||
|
||||
# operating mode of the files cache (for fast skipping of unchanged files)
|
||||
DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode'
|
||||
DEFAULT_FILES_CACHE_MODE = 'cis' # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI)
|
||||
|
|
Loading…
Reference in New Issue