From b8bb0494f678cd1579e460f5835656a05cf5521a Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Fri, 11 Dec 2020 00:34:11 +0100
Subject: [PATCH 1/3] create --sparse, file map support for the "fixed"
 chunker, see #14

a file map can be:

- created internally inside chunkify by calling sparsemap, which uses
  SEEK_DATA / SEEK_HOLE to determine data and hole ranges inside a
  seekable sparse file.
  Usage: borg create --sparse --chunker-params=fixed,BLOCKSIZE ...
  BLOCKSIZE is the chunker blocksize here, not the filesystem blocksize!

- made by some other means and given to the chunkify function.
  this is not used yet, but in future this could be used to only read
  the changed parts and seek over the (known) unchanged parts of a file.

sparsemap: the generate range sizes are multiples of the fs block size.
           the tests assume 4kiB fs block size.
---
 docs/internals/data-structures.rst   |  12 +-
 docs/usage/create.rst                |   5 +-
 src/borg/archive.py                  |   4 +-
 src/borg/archiver.py                 |   4 +-
 src/borg/chunker.pyx                 | 187 +++++++++++++++++++++------
 src/borg/testsuite/chunker.py        |  49 +++++++
 src/borg/testsuite/chunker_pytest.py | 120 +++++++++++++++++
 7 files changed, 334 insertions(+), 47 deletions(-)
 create mode 100644 src/borg/testsuite/chunker_pytest.py

diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst
index caaf75810..7d00ac322 100644
--- a/docs/internals/data-structures.rst
+++ b/docs/internals/data-structures.rst
@@ -596,14 +596,20 @@ The fixed chunker triggers (chunks) at even-spaced offsets, e.g. every 4MiB,
 producing chunks of same block size (the last chunk is not required to be
 full-size).
 
-Optionally, it can cut the first "header" chunk with a different size (the
-default is not to have a differently sized header chunk).
+Optionally, it supports processing a differently sized "header" first, before
+it starts to cut chunks of the desired block size.
+The default is not to have a differently sized header.
 
 ``borg create --chunker-params fixed,BLOCK_SIZE[,HEADER_SIZE]``
 
 - BLOCK_SIZE: no default value, multiple of the system page size (usually 4096
   bytes) recommended. E.g.: 4194304 would cut 4MiB sized chunks.
-- HEADER_SIZE: optional, defaults to 0 (no header chunk).
+- HEADER_SIZE: optional, defaults to 0 (no header).
+
+The fixed chunker also supports processing sparse files (reading only the ranges
+with data and seeking over the empty hole ranges).
+
+``borg create --sparse --chunker-params fixed,BLOCK_SIZE[,HEADER_SIZE]``
 
 "buzhash" chunker
 +++++++++++++++++
diff --git a/docs/usage/create.rst b/docs/usage/create.rst
index d8e00df21..4b3966a47 100644
--- a/docs/usage/create.rst
+++ b/docs/usage/create.rst
@@ -43,7 +43,10 @@ Examples
     $ borg create --chunker-params buzhash,10,23,16,4095 /path/to/repo::small /smallstuff
 
     # Backup a raw device (must not be active/in use/mounted at that time)
-    $ dd if=/dev/sdx bs=4M | borg create --chunker-params fixed,4194304 /path/to/repo::my-sdx -
+    $ borg create --read-special --chunker-params fixed,4194304 /path/to/repo::my-sdx /dev/sdX
+
+    # Backup a sparse disk image (must not be active/in use/mounted at that time)
+    $ borg create --sparse --chunker-params fixed,4194304 /path/to/repo::my-disk my-disk.raw
 
     # No compression (none)
     $ borg create --compression none /path/to/repo::arch ~
diff --git a/src/borg/archive.py b/src/borg/archive.py
index 708d78b18..1555536d5 100644
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1172,7 +1172,7 @@ class FilesystemObjectProcessors:
 
     def __init__(self, *, metadata_collector, cache, key,
                  add_item, process_file_chunks,
-                 chunker_params, show_progress):
+                 chunker_params, show_progress, sparse):
         self.metadata_collector = metadata_collector
         self.cache = cache
         self.key = key
@@ -1183,7 +1183,7 @@ def __init__(self, *, metadata_collector, cache, key,
         self.hard_links = {}
         self.stats = Statistics()  # threading: done by cache (including progress)
         self.cwd = os.getcwd()
-        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed)
+        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
 
     @contextmanager
     def create_helper(self, path, st, status=None, hardlinkable=True):
diff --git a/src/borg/archiver.py b/src/borg/archiver.py
index c9704a949..651dcf3b4 100644
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -653,7 +653,7 @@ def create_inner(archive, cache, fso):
                     checkpoint_interval=args.checkpoint_interval, rechunkify=False)
                 fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
                     process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
-                    chunker_params=args.chunker_params, show_progress=args.progress)
+                    chunker_params=args.chunker_params, show_progress=args.progress, sparse=args.sparse)
                 create_inner(archive, cache, fso)
         else:
             create_inner(None, None, None)
@@ -3354,6 +3354,8 @@ def define_borg_mount(parser):
                               help='deprecated, use ``--noflags`` instead')
         fs_group.add_argument('--noflags', dest='noflags', action='store_true',
                               help='do not read and store flags (e.g. NODUMP, IMMUTABLE) into archive')
+        fs_group.add_argument('--sparse', dest='sparse', action='store_true',
+                               help='detect sparse holes in input (supported only by fixed chunker)')
         fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode',
                               type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI,
                               help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI)
diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx
index 68f9c010e..3c2e02ff8 100644
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@@ -2,6 +2,7 @@
 
 API_VERSION = '1.2_01'
 
+import errno
 import os
 
 from libc.stdlib cimport free
@@ -19,65 +20,170 @@ cdef extern from "_chunker.c":
     uint32_t c_buzhash_update  "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
 
 
+def dread(offset, size, fd=None, fh=-1):
+    use_fh = fh >= 0
+    if use_fh:
+        data = os.read(fh, size)
+        if hasattr(os, 'posix_fadvise'):
+            # UNIX only and, in case of block sizes that are not a multiple of the
+            # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
+            # see comment/workaround in _chunker.c and borgbackup issue #907.
+            os.posix_fadvise(fh, offset, len(data), os.POSIX_FADV_DONTNEED)
+        return data
+    else:
+        return fd.read(size)
+
+
+def dseek(amount, whence, fd=None, fh=-1):
+    use_fh = fh >= 0
+    if use_fh:
+        return os.lseek(fh, amount, whence)
+    else:
+        return fd.seek(amount, whence)
+
+
+def dpos_curr_end(fd=None, fh=-1):
+    """
+    determine current position, file end position (== file length)
+    """
+    curr = dseek(0, os.SEEK_CUR, fd, fh)
+    end = dseek(0, os.SEEK_END, fd, fh)
+    dseek(curr, os.SEEK_SET, fd, fh)
+    return curr, end
+
+
+def sparsemap(fd=None, fh=-1):
+    """
+    generator yielding a (start, length, is_data) tuple for each range.
+    is_data is indicating data ranges (True) or hole ranges (False).
+
+    note:
+    the map is generated starting from the current seek position (it
+    is not required to be 0 / to be at the start of the file) and
+    work from there up to the end of the file.
+    when the generator is finished, the file pointer position will be
+    reset to where it was before calling this function.
+    """
+    curr, file_len = dpos_curr_end(fd, fh)
+    start = curr
+    try:
+        whence = os.SEEK_HOLE
+        while True:
+            is_data = whence == os.SEEK_HOLE  # True: range with data, False: range is a hole
+            try:
+                end = dseek(start, whence, fd, fh)
+            except OSError as e:
+                if e.errno == errno.ENXIO:
+                    if not is_data and start < file_len:
+                        # if there is a hole at the end of a file, we can not find the file end by SEEK_DATA
+                        # (because we run into ENXIO), thus we must manually deal with this case:
+                        end = file_len
+                        yield (start, end - start, is_data)
+                    break
+                else:
+                    raise
+            # we do not want to yield zero-length ranges with start == end:
+            if end > start:
+                yield (start, end - start, is_data)
+            start = end
+            whence = os.SEEK_DATA if is_data else os.SEEK_HOLE
+    finally:
+        # seek to same position as before calling this function
+        dseek(curr, os.SEEK_SET, fd, fh)
+
+
 class ChunkerFixed:
     """
-    Fixed blocksize Chunker, optionally supporting a header block of different size.
-
-    This is a very simple chunker for input data with known block/record sizes:
+    This is a simple chunker for input data with data usually staying at same
+    offset and / or with known block/record sizes:
 
     - raw disk images
     - block devices
     - database files with simple header + fixed-size records layout
 
-    Note: the last block of the input data may be less than the block size,
+    It optionally supports:
+
+    - a header block of different size
+    - using a sparsemap to only read data ranges and seek over hole ranges
+      for sparse files.
+    - using an externally given filemap to only read specific ranges from
+      a file.
+
+    Note: the last block of a data or hole range may be less than the block size,
           this is supported and not considered to be an error.
     """
-    def __init__(self, block_size, header_size=0):
+    def __init__(self, block_size, header_size=0, sparse=False):
         self.block_size = block_size
         self.header_size = header_size
+        # should borg try to do sparse input processing?
+        # whether it actually can be done depends on the input file being seekable.
+        self.try_sparse = sparse and hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
+        self.zeros = memoryview(bytes(block_size))
 
-    def chunkify(self, fd, fh=-1):
+    def chunkify(self, fd=None, fh=-1, fmap=None):
         """
         Cut a file into chunks.
 
         :param fd: Python file object
         :param fh: OS-level file handle (if available),
                    defaults to -1 which means not to use OS-level fd.
+        :param fmap: a file map, same format as generated by sparsemap
         """
+        if fmap is None:
+            if self.try_sparse:
+                try:
+                    if self.header_size > 0:
+                        header_map = [(0, self.header_size, True), ]
+                        dseek(self.header_size, os.SEEK_SET, fd, fh)
+                        body_map = list(sparsemap(fd, fh))
+                        dseek(0, os.SEEK_SET, fd, fh)
+                    else:
+                        header_map = []
+                        body_map = list(sparsemap(fd, fh))
+                except OSError as err:
+                    # seeking did not work
+                    pass
+                else:
+                    fmap = header_map + body_map
+
+            if fmap is None:
+                # either sparse processing (building the fmap) was not tried or it failed.
+                # in these cases, we just build a "fake fmap" that considers the whole file
+                # as range(s) of data (no holes), so we can use the same code.
+                # we build different fmaps here for the purpose of correct block alignment
+                # with or without a header block (of potentially different size).
+                if self.header_size > 0:
+                    header_map = [(0, self.header_size, True), ]
+                    body_map = [(self.header_size, 2 ** 62, True), ]
+                else:
+                    header_map = []
+                    body_map = [(0, 2 ** 62, True), ]
+                fmap = header_map + body_map
+
         offset = 0
-        use_fh = fh >= 0
-
-        if use_fh:
-            def read(size):
-                nonlocal offset
-                data = os.read(fh, size)
-                amount = len(data)
-                if hasattr(os, 'posix_fadvise'):
-                    # UNIX only and, in case of block sizes that are not a multiple of the
-                    # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
-                    # see comment/workaround in _chunker.c and borgbackup issue #907.
-                    os.posix_fadvise(fh, offset, amount, os.POSIX_FADV_DONTNEED)
-                offset += amount
-                return data
-        else:
-            def read(size):
-                nonlocal offset
-                data = fd.read(size)
-                amount = len(data)
-                offset += amount
-                return data
-
-        if self.header_size > 0:
-            data = read(self.header_size)
-            if data:
-                yield data
-        else:
-            data = True  # get into next while loop
-        while data:
-            data = read(self.block_size)
-            if data:
-                yield data
-        # empty data means we are at EOF and we terminate the generator.
+        for range_start, range_size, is_data in fmap:
+            if range_start != offset:
+                # this is for the case when the fmap does not cover the file completely,
+                # e.g. it could be without the ranges of holes or of unchanged data.
+                offset = range_start
+                dseek(offset, os.SEEK_SET, fd, fh)
+            while range_size:
+                wanted = min(range_size, self.block_size)
+                if is_data:
+                    # read block from the range
+                    data = dread(offset, wanted, fd, fh)
+                else:  # hole
+                    # seek over block from the range
+                    pos = dseek(wanted, os.SEEK_CUR, fd, fh)
+                    data = self.zeros[:pos - offset]  # for now, create zero-bytes here
+                got = len(data)
+                if got > 0:
+                    offset += got
+                    range_size -= got
+                    yield data  # later, use a better api that tags data vs. hole
+                if got < wanted:
+                    # we did not get enough data, looks like EOF.
+                    return
 
 
 cdef class Chunker:
@@ -129,7 +235,8 @@ def get_chunker(algo, *params, **kw):
         seed = kw['seed']
         return Chunker(seed, *params)
     if algo == 'fixed':
-        return ChunkerFixed(*params)
+        sparse = kw['sparse']
+        return ChunkerFixed(*params, sparse=sparse)
     raise TypeError('unsupported chunker algo %r' % algo)
 
 
diff --git a/src/borg/testsuite/chunker.py b/src/borg/testsuite/chunker.py
index c49e5be03..df79441b6 100644
--- a/src/borg/testsuite/chunker.py
+++ b/src/borg/testsuite/chunker.py
@@ -22,6 +22,55 @@ def test_chunkify_header_and_blocks(self):
         parts = [c for c in chunker.chunkify(BytesIO(data))]
         self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
 
+    def test_chunkify_just_blocks_fmap_complete(self):
+        data = b'foobar' * 1500
+        chunker = ChunkerFixed(4096)
+        fmap = [
+            (0, 4096, True),
+            (4096, 8192, True),
+            (8192, 99999999, True),
+        ]
+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
+        self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]])
+
+    def test_chunkify_header_and_blocks_fmap_complete(self):
+        data = b'foobar' * 1500
+        chunker = ChunkerFixed(4096, 123)
+        fmap = [
+            (0, 123, True),
+            (123, 4096, True),
+            (123+4096, 4096, True),
+            (123+8192, 4096, True),
+        ]
+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
+        self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
+
+    def test_chunkify_header_and_blocks_fmap_zeros(self):
+        data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096
+        chunker = ChunkerFixed(4096, 123)
+        fmap = [
+            (0, 123, True),
+            (123, 4096, False),
+            (123+4096, 4096, True),
+            (123+8192, 4096, False),
+        ]
+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
+        # because we marked the '_' ranges as holes, we will get '\0' ranges instead!
+        self.assert_equal(parts, [data[0:123], b'\0' * 4096, data[123+4096:123+8192], b'\0' * 4096])
+
+    def test_chunkify_header_and_blocks_fmap_partial(self):
+        data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096
+        chunker = ChunkerFixed(4096, 123)
+        fmap = [
+            (0, 123, True),
+            # (123, 4096, False),
+            (123+4096, 4096, True),
+            # (123+8192, 4096, False),
+        ]
+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
+        # because we left out the '_' ranges from the fmap, we will not get them at all!
+        self.assert_equal(parts, [data[0:123], data[123+4096:123+8192]])
+
 
 class ChunkerTestCase(BaseTestCase):
 
diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py
new file mode 100644
index 000000000..89e120535
--- /dev/null
+++ b/src/borg/testsuite/chunker_pytest.py
@@ -0,0 +1,120 @@
+from io import BytesIO
+import os
+
+import pytest
+
+from ..chunker import ChunkerFixed, sparsemap
+from ..constants import *  # NOQA
+
+BS = 4096  # fs block size
+
+# some sparse files. X = content blocks, _ = sparse blocks.
+# X__XXX____
+map_sparse1 = [
+    (0 * BS, 1 * BS, True),
+    (1 * BS, 2 * BS, False),
+    (3 * BS, 3 * BS, True),
+    (6 * BS, 4 * BS, False),
+]
+
+# _XX___XXXX
+map_sparse2 = [
+    (0 * BS, 1 * BS, False),
+    (1 * BS, 2 * BS, True),
+    (3 * BS, 3 * BS, False),
+    (6 * BS, 4 * BS, True),
+]
+
+# XXX
+map_notsparse = [(0 * BS, 3 * BS, True), ]
+
+# ___
+map_onlysparse = [(0 * BS, 3 * BS, False), ]
+
+
+def make_sparsefile(fname, sparsemap, header_size=0):
+    with open(fname, 'wb') as fd:
+        total = 0
+        if header_size:
+            fd.write(b'H' * header_size)
+            total += header_size
+        for offset, size, is_data in sparsemap:
+            if is_data:
+                fd.write(b'X' * size)
+            else:
+                fd.seek(size, os.SEEK_CUR)
+            total += size
+        fd.truncate(total)
+    assert os.path.getsize(fname) == total
+
+
+def make_content(sparsemap, header_size=0):
+    with BytesIO() as fd:
+        total = 0
+        if header_size:
+            fd.write(b'H' * header_size)
+            total += header_size
+        for offset, size, is_data in sparsemap:
+            if is_data:
+                fd.write(b'X' * size)
+            else:
+                fd.write(b'\0' * size)
+            total += size
+        content = fd.getvalue()
+    assert len(content) == total
+    return content
+
+
+@pytest.mark.parametrize("fname, sparse_map", [
+    ('sparse1', map_sparse1),
+    ('sparse2', map_sparse2),
+    ('onlysparse', map_onlysparse),
+    ('notsparse', map_notsparse),
+])
+def test_sparsemap(tmpdir, fname, sparse_map):
+
+    def get_sparsemap_fh(fname):
+        fh = os.open(fname, flags=os.O_RDONLY)
+        try:
+            return list(sparsemap(fh=fh))
+        finally:
+            os.close(fh)
+
+    def get_sparsemap_fd(fname):
+        with open(fname, 'rb') as fd:
+            return list(sparsemap(fd=fd))
+
+    fn = str(tmpdir / fname)
+    make_sparsefile(fn, sparse_map)
+    assert get_sparsemap_fh(fn) == sparse_map
+    assert get_sparsemap_fd(fn) == sparse_map
+
+
+@pytest.mark.parametrize("fname, sparse_map, header_size, sparse", [
+    ('sparse1', map_sparse1, 0, False),
+    ('sparse1', map_sparse1, 0, True),
+    ('sparse1', map_sparse1, BS, False),
+    ('sparse1', map_sparse1, BS, True),
+    ('sparse2', map_sparse2, 0, False),
+    ('sparse2', map_sparse2, 0, True),
+    ('sparse2', map_sparse2, BS, False),
+    ('sparse2', map_sparse2, BS, True),
+    ('onlysparse', map_onlysparse, 0, False),
+    ('onlysparse', map_onlysparse, 0, True),
+    ('onlysparse', map_onlysparse, BS, False),
+    ('onlysparse', map_onlysparse, BS, True),
+    ('notsparse', map_notsparse, 0, False),
+    ('notsparse', map_notsparse, 0, True),
+    ('notsparse', map_notsparse, BS, False),
+    ('notsparse', map_notsparse, BS, True),
+])
+def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
+
+    def get_chunks(fname, sparse, header_size):
+        chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse)
+        with open(fname, 'rb') as fd:
+            return b''.join([c for c in chunker.chunkify(fd)])
+
+    fn = str(tmpdir / fname)
+    make_sparsefile(fn, sparse_map, header_size=header_size)
+    get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)

From c0c0da9c76d49d8d004ff4de661710620c6f97f5 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Fri, 25 Dec 2020 22:04:15 +0100
Subject: [PATCH 2/3] skip sparse tests if has_seek_hole is False

also: do the os.SEEK_(HOLE|DATA) check only once
---
 src/borg/chunker.pyx                 | 8 +++++++-
 src/borg/testsuite/archiver.py       | 3 ++-
 src/borg/testsuite/chunker_pytest.py | 4 +++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx
index 3c2e02ff8..03122ec4b 100644
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@@ -20,6 +20,12 @@ cdef extern from "_chunker.c":
     uint32_t c_buzhash_update  "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
 
 
+# this will be True if Python's seek implementation supports data/holes seeking.
+# this does not imply that it will actually work on the filesystem,
+# because the FS also needs to support this.
+has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
+
+
 def dread(offset, size, fd=None, fh=-1):
     use_fh = fh >= 0
     if use_fh:
@@ -117,7 +123,7 @@ class ChunkerFixed:
         self.header_size = header_size
         # should borg try to do sparse input processing?
         # whether it actually can be done depends on the input file being seekable.
-        self.try_sparse = sparse and hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
+        self.try_sparse = sparse and has_seek_hole
         self.zeros = memoryview(bytes(block_size))
 
     def chunkify(self, fd=None, fh=-1, fmap=None):
diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py
index c978ba280..fbe5931ab 100644
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -32,6 +32,7 @@
 from ..archive import Archive, ChunkBuffer
 from ..archiver import Archiver, parse_storage_quota, PURE_PYTHON_MSGPACK_WARNING
 from ..cache import Cache, LocalCache
+from ..chunker import has_seek_hole
 from ..constants import *  # NOQA
 from ..crypto.low_level import bytes_to_long, num_cipher_blocks
 from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
@@ -563,7 +564,7 @@ def is_sparse(fn, total_size, hole_size):
             sparse = True
             if sparse and hasattr(st, 'st_blocks') and st.st_blocks * 512 >= st.st_size:
                 sparse = False
-            if sparse and hasattr(os, 'SEEK_HOLE') and hasattr(os, 'SEEK_DATA'):
+            if sparse and has_seek_hole:
                 with open(fn, 'rb') as fd:
                     # only check if the first hole is as expected, because the 2nd hole check
                     # is problematic on xfs due to its "dynamic speculative EOF preallocation
diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py
index 89e120535..7f1d25e50 100644
--- a/src/borg/testsuite/chunker_pytest.py
+++ b/src/borg/testsuite/chunker_pytest.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from ..chunker import ChunkerFixed, sparsemap
+from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
 from ..constants import *  # NOQA
 
 BS = 4096  # fs block size
@@ -65,6 +65,7 @@ def make_content(sparsemap, header_size=0):
     return content
 
 
+@pytest.mark.skipif(not has_seek_hole)
 @pytest.mark.parametrize("fname, sparse_map", [
     ('sparse1', map_sparse1),
     ('sparse2', map_sparse2),
@@ -90,6 +91,7 @@ def get_sparsemap_fd(fname):
     assert get_sparsemap_fd(fn) == sparse_map
 
 
+@pytest.mark.skipif(not has_seek_hole)
 @pytest.mark.parametrize("fname, sparse_map, header_size, sparse", [
     ('sparse1', map_sparse1, 0, False),
     ('sparse1', map_sparse1, 0, True),

From 37a7436ff96b92f721eb3ce109f8a5ca5cb6d2cb Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Fri, 25 Dec 2020 23:03:07 +0100
Subject: [PATCH 3/3] detect sparse support by fs

---
 src/borg/testsuite/chunker_pytest.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py
index 7f1d25e50..daa46bb38 100644
--- a/src/borg/testsuite/chunker_pytest.py
+++ b/src/borg/testsuite/chunker_pytest.py
@@ -1,5 +1,6 @@
 from io import BytesIO
 import os
+import tempfile
 
 import pytest
 
@@ -65,7 +66,23 @@ def make_content(sparsemap, header_size=0):
     return content
 
 
-@pytest.mark.skipif(not has_seek_hole)
+def fs_supports_sparse():
+    if not has_seek_hole:
+        return False
+    with tempfile.TemporaryDirectory() as tmpdir:
+        fn = os.path.join(tmpdir, 'test_sparse')
+        make_sparsefile(fn, [(0, BS, False), (BS, BS, True)])
+        with open(fn, 'rb') as f:
+            try:
+                offset_hole = f.seek(0, os.SEEK_HOLE)
+                offset_data = f.seek(0, os.SEEK_DATA)
+            except OSError:
+                # no sparse support if these seeks do not work
+                return False
+        return offset_hole == 0 and offset_data == BS
+
+
+@pytest.mark.skipif(not fs_supports_sparse(), reason='fs does not support sparse files')
 @pytest.mark.parametrize("fname, sparse_map", [
     ('sparse1', map_sparse1),
     ('sparse2', map_sparse2),
@@ -91,7 +108,7 @@ def get_sparsemap_fd(fname):
     assert get_sparsemap_fd(fn) == sparse_map
 
 
-@pytest.mark.skipif(not has_seek_hole)
+@pytest.mark.skipif(not fs_supports_sparse(), reason='fs does not support sparse files')
 @pytest.mark.parametrize("fname, sparse_map, header_size, sparse", [
     ('sparse1', map_sparse1, 0, False),
     ('sparse1', map_sparse1, 0, True),