mirror of https://github.com/borgbackup/borg.git
PR #284 - Merge branch 'sparse_files' into merge
This commit is contained in:
commit
b6ed1c742b
|
@ -87,14 +87,14 @@ typedef struct {
|
|||
} Chunker;
|
||||
|
||||
static Chunker *
|
||||
chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed)
|
||||
chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed)
|
||||
{
|
||||
Chunker *c = calloc(sizeof(Chunker), 1);
|
||||
c->window_size = window_size;
|
||||
c->chunk_mask = chunk_mask;
|
||||
c->min_size = min_size;
|
||||
c->table = buzhash_init_table(seed);
|
||||
c->buf_size = 10 * 1024 * 1024;
|
||||
c->buf_size = max_size;
|
||||
c->data = malloc(c->buf_size);
|
||||
c->read_buf = malloc(c->buf_size);
|
||||
return c;
|
||||
|
|
|
@ -22,9 +22,12 @@ from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \
|
|||
|
||||
ITEMS_BUFFER = 1024 * 1024
|
||||
CHUNK_MIN = 1024
|
||||
CHUNK_MAX = 10 * 1024 * 1024
|
||||
WINDOW_SIZE = 0xfff
|
||||
CHUNK_MASK = 0xffff
|
||||
|
||||
ZEROS = b'\0' * CHUNK_MAX
|
||||
|
||||
utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
|
||||
utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {})
|
||||
has_mtime_ns = sys.version >= '3.3'
|
||||
|
@ -71,7 +74,7 @@ class ChunkBuffer:
|
|||
self.packer = msgpack.Packer(unicode_errors='surrogateescape')
|
||||
self.chunks = []
|
||||
self.key = key
|
||||
self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed)
|
||||
self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX,self.key.chunk_seed)
|
||||
|
||||
def add(self, item):
|
||||
self.buffer.write(self.packer.pack(StableDict(item)))
|
||||
|
@ -136,7 +139,7 @@ class Archive:
|
|||
self.pipeline = DownloadPipeline(self.repository, self.key)
|
||||
if create:
|
||||
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
|
||||
self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed)
|
||||
self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX, self.key.chunk_seed)
|
||||
if name in manifest.archives:
|
||||
raise self.AlreadyExists(name)
|
||||
self.last_checkpoint = time.time()
|
||||
|
@ -285,7 +288,13 @@ class Archive:
|
|||
with open(path, 'wb') as fd:
|
||||
ids = [c[0] for c in item[b'chunks']]
|
||||
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
|
||||
fd.write(data)
|
||||
if ZEROS.startswith(data):
|
||||
# all-zero chunk: create a hole in a sparse file
|
||||
fd.seek(len(data), 1)
|
||||
else:
|
||||
fd.write(data)
|
||||
pos = fd.tell()
|
||||
fd.truncate(pos)
|
||||
fd.flush()
|
||||
self.restore_attrs(path, item, fd=fd.fileno())
|
||||
elif stat.S_ISFIFO(mode):
|
||||
|
|
|
@ -8,7 +8,7 @@ cdef extern from "_chunker.c":
|
|||
ctypedef int uint32_t
|
||||
ctypedef struct _Chunker "Chunker":
|
||||
pass
|
||||
_Chunker *chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed)
|
||||
_Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed)
|
||||
void chunker_set_fd(_Chunker *chunker, object f, int fd)
|
||||
void chunker_free(_Chunker *chunker)
|
||||
object chunker_process(_Chunker *chunker)
|
||||
|
@ -20,8 +20,8 @@ cdef extern from "_chunker.c":
|
|||
cdef class Chunker:
|
||||
cdef _Chunker *chunker
|
||||
|
||||
def __cinit__(self, window_size, chunk_mask, min_size, seed):
|
||||
self.chunker = chunker_init(window_size, chunk_mask, min_size, seed & 0xffffffff)
|
||||
def __cinit__(self, window_size, chunk_mask, min_size, max_size, seed):
|
||||
self.chunker = chunker_init(window_size, chunk_mask, min_size, max_size, seed & 0xffffffff)
|
||||
|
||||
def chunkify(self, fd, fh=-1):
|
||||
"""
|
||||
|
|
|
@ -11,7 +11,7 @@ import time
|
|||
import unittest
|
||||
from hashlib import sha256
|
||||
from attic import xattr
|
||||
from attic.archive import Archive, ChunkBuffer
|
||||
from attic.archive import Archive, ChunkBuffer, CHUNK_MAX
|
||||
from attic.archiver import Archiver
|
||||
from attic.cache import Cache
|
||||
from attic.crypto import bytes_to_long, num_aes_blocks
|
||||
|
@ -206,6 +206,38 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
config.write(fd)
|
||||
return Repository(self.repository_path).id
|
||||
|
||||
def test_sparse_file(self):
|
||||
filename = os.path.join(self.input_path, 'sparse')
|
||||
content = b'foobar'
|
||||
hole_size = 5 * CHUNK_MAX # 5 full chunker buffers
|
||||
with open(filename, 'wb') as fd:
|
||||
# create a file that has a hole at the beginning and end
|
||||
fd.seek(hole_size, 1)
|
||||
fd.write(content)
|
||||
fd.seek(hole_size, 1)
|
||||
pos = fd.tell()
|
||||
fd.truncate(pos)
|
||||
total_len = hole_size + len(content) + hole_size
|
||||
st = os.stat(filename)
|
||||
self.assert_equal(st.st_size, total_len)
|
||||
if hasattr(st, 'st_blocks'):
|
||||
self.assert_true(st.st_blocks * 512 < total_len / 10) # is input sparse?
|
||||
self.attic('init', self.repository_location)
|
||||
self.attic('create', self.repository_location + '::test', 'input')
|
||||
with changedir('output'):
|
||||
self.attic('extract', self.repository_location + '::test')
|
||||
self.assert_dirs_equal('input', 'output/input')
|
||||
filename = os.path.join(self.output_path, 'input', 'sparse')
|
||||
with open(filename, 'rb') as fd:
|
||||
# check if file contents are as expected
|
||||
self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
|
||||
self.assert_equal(fd.read(len(content)), content)
|
||||
self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
|
||||
st = os.stat(filename)
|
||||
self.assert_equal(st.st_size, total_len)
|
||||
if hasattr(st, 'st_blocks'):
|
||||
self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse?
|
||||
|
||||
def test_repository_swap_detection(self):
|
||||
self.create_test_files()
|
||||
os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
|
||||
|
|
|
@ -1,25 +1,26 @@
|
|||
from attic.chunker import Chunker, buzhash, buzhash_update
|
||||
from attic.testsuite import AtticTestCase
|
||||
from attic.archive import CHUNK_MAX
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class ChunkerTestCase(AtticTestCase):
|
||||
|
||||
def test_chunkify(self):
|
||||
data = b'0' * 1024 * 1024 * 15 + b'Y'
|
||||
parts = [bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(data))]
|
||||
data = b'0' * int(1.5 * CHUNK_MAX) + b'Y'
|
||||
parts = [bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(data))]
|
||||
self.assert_equal(len(parts), 2)
|
||||
self.assert_equal(b''.join(parts), data)
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(b''))], [])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(b''))], [])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
|
||||
self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
|
||||
|
||||
def test_buzhash(self):
|
||||
self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769)
|
||||
|
|
Loading…
Reference in New Issue