Chunker: yield Chunk namedtuple instead of bytes/memoryview

2020-12-14 23:46:04 +01:00 · 2020-12-14 23:46:04 +01:00 · 8c299696aa
parent 37d4aee122
commit 8c299696aa
2 changed files with 31 additions and 4 deletions
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@ -4,6 +4,9 @@ API_VERSION = '1.2_01'

 import errno
 import os
+from collections import namedtuple
+
+from .constants import CH_DATA, CH_HOLE

 from libc.stdlib cimport free

@ -26,6 +29,25 @@ cdef extern from "_chunker.c":
 has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')


+_Chunk = namedtuple('_Chunk', 'meta data')
+_Chunk.__doc__ = """\
+    Chunk namedtuple
+
+    meta is always a dictionary, data depends on allocation.
+
+    on disk data:
+        meta = {'allocation' = CH_DATA, 'size' = size_of_data }
+        data = read_data [bytes or memoryview]
+
+    hole in a sparse file:
+        meta = {'allocation' = CH_HOLE, 'size' = size_of_hole }
+        data = None
+"""
+
+def Chunk(data, **meta):
+    return _Chunk(meta, data)
+
+
 def dread(offset, size, fd=None, fh=-1):
    use_fh = fh >= 0
    if use_fh:
@ -178,15 +200,16 @@ class ChunkerFixed:
                if is_data:
                    # read block from the range
                    data = dread(offset, wanted, fd, fh)
+                    got = len(data)
                else:  # hole
                    # seek over block from the range
                    pos = dseek(wanted, os.SEEK_CUR, fd, fh)
-                    data = self.zeros[:pos - offset]  # for now, create zero-bytes here
-                got = len(data)
+                    data = None
+                    got = pos - offset
                if got > 0:
                    offset += got
                    range_size -= got
-                    yield data  # later, use a better api that tags data vs. hole
+                    yield Chunk(data, size=got, allocation=CH_DATA if is_data else CH_HOLE)
                if got < wanted:
                    # we did not get enough data, looks like EOF.
                    return
@ -233,7 +256,8 @@ cdef class Chunker:
        return self

    def __next__(self):
-        return chunker_process(self.chunker)
+        data = chunker_process(self.chunker)
+        return Chunk(data, size=len(data), allocation=CH_DATA)  # no sparse support here


 def get_chunker(algo, *params, **kw):
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@ -75,6 +75,9 @@ CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH
 # chunker params for the items metadata stream, finer granularity
 ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE)

+# normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros)
+CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2
+
 # operating mode of the files cache (for fast skipping of unchanged files)
 DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode'
 DEFAULT_FILES_CACHE_MODE = 'cis'  # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI)