mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-26 01:37:20 +00:00
Merge pull request #1458 from ThomasWaldmann/merge-1.0-maint
Merge 1.0 maint
This commit is contained in:
commit
f8b2ffe999
8 changed files with 134 additions and 56 deletions
|
@ -130,6 +130,14 @@ Security fixes:
|
|||
|
||||
- fix security issue with remote repository access, #1428
|
||||
|
||||
Bug fixes:
|
||||
|
||||
- do not write objects to repository that are bigger than the allowed size,
|
||||
borg will reject reading them, #1451.
|
||||
IMPORTANT: if you created archives with many millions of files or
|
||||
directories, please verify if you can open them successfully,
|
||||
e.g. try a "borg list REPO::ARCHIVE".
|
||||
|
||||
|
||||
Version 1.0.7rc1 (2016-08-05)
|
||||
-----------------------------
|
||||
|
|
38
docs/usage/debug-dump-repo-objs.rst.inc
Normal file
38
docs/usage/debug-dump-repo-objs.rst.inc
Normal file
|
@ -0,0 +1,38 @@
|
|||
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
|
||||
|
||||
.. _borg_debug-dump-repo-objs:
|
||||
|
||||
borg debug-dump-repo-objs
|
||||
-------------------------
|
||||
::
|
||||
|
||||
usage: borg debug-dump-repo-objs [-h] [--critical] [--error] [--warning]
|
||||
[--info] [--debug] [--lock-wait N]
|
||||
[--show-rc] [--no-files-cache] [--umask M]
|
||||
[--remote-path PATH]
|
||||
REPOSITORY
|
||||
|
||||
dump (decrypted, decompressed) repo objects
|
||||
|
||||
positional arguments:
|
||||
REPOSITORY repo to dump
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--critical work on log level CRITICAL
|
||||
--error work on log level ERROR
|
||||
--warning work on log level WARNING (default)
|
||||
--info, -v, --verbose
|
||||
work on log level INFO
|
||||
--debug work on log level DEBUG
|
||||
--lock-wait N wait for the lock, but max. N seconds (default: 1).
|
||||
--show-rc show/log the return code (rc)
|
||||
--no-files-cache do not load/update the file metadata cache used to
|
||||
detect unchanged files
|
||||
--umask M set umask to M (local and remote, default: 0077)
|
||||
--remote-path PATH set remote path to executable (default: "borg")
|
||||
|
||||
Description
|
||||
~~~~~~~~~~~
|
||||
|
||||
This command dumps raw (but decrypted and decompressed) repo objects to files.
|
|
@ -1,3 +1,4 @@
|
|||
import threading
|
||||
import zlib
|
||||
try:
|
||||
import lzma
|
||||
|
@ -7,6 +8,18 @@ except ImportError:
|
|||
cdef extern from "lz4.h":
|
||||
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
int LZ4_compressBound(int inputSize) nogil
|
||||
|
||||
|
||||
thread_local = threading.local()
|
||||
thread_local.buffer = bytes()
|
||||
|
||||
|
||||
cdef char *get_buffer(size):
|
||||
size = int(size)
|
||||
if len(thread_local.buffer) < size:
|
||||
thread_local.buffer = bytes(size)
|
||||
return <char *> thread_local.buffer
|
||||
|
||||
|
||||
cdef class CompressorBase:
|
||||
|
@ -52,40 +65,30 @@ class CNONE(CompressorBase):
|
|||
return data
|
||||
|
||||
|
||||
cdef class LZ4(CompressorBase):
|
||||
class LZ4(CompressorBase):
|
||||
"""
|
||||
raw LZ4 compression / decompression (liblz4).
|
||||
|
||||
Features:
|
||||
- lz4 is super fast
|
||||
- wrapper releases CPython's GIL to support multithreaded code
|
||||
- buffer given by caller, avoiding frequent reallocation and buffer duplication
|
||||
- uses safe lz4 methods that never go beyond the end of the output buffer
|
||||
|
||||
But beware:
|
||||
- this is not very generic, the given buffer MUST be large enough to
|
||||
handle all compression or decompression output (or it will fail).
|
||||
- you must not do method calls to the same LZ4 instance from different
|
||||
threads at the same time - create one LZ4 instance per thread!
|
||||
"""
|
||||
ID = b'\x01\x00'
|
||||
name = 'lz4'
|
||||
|
||||
cdef char *buffer # helper buffer for (de)compression output
|
||||
cdef int bufsize # size of this buffer
|
||||
|
||||
def __cinit__(self, **kwargs):
|
||||
buffer = kwargs['buffer']
|
||||
self.buffer = buffer
|
||||
self.bufsize = len(buffer)
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
||||
def compress(self, idata):
|
||||
if not isinstance(idata, bytes):
|
||||
idata = bytes(idata) # code below does not work with memoryview
|
||||
cdef int isize = len(idata)
|
||||
cdef int osize = self.bufsize
|
||||
cdef int osize
|
||||
cdef char *source = idata
|
||||
cdef char *dest = self.buffer
|
||||
cdef char *dest
|
||||
osize = LZ4_compressBound(isize)
|
||||
dest = get_buffer(osize)
|
||||
with nogil:
|
||||
osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
|
||||
if not osize:
|
||||
|
@ -97,15 +100,25 @@ cdef class LZ4(CompressorBase):
|
|||
idata = bytes(idata) # code below does not work with memoryview
|
||||
idata = super().decompress(idata)
|
||||
cdef int isize = len(idata)
|
||||
cdef int osize = self.bufsize
|
||||
cdef int osize
|
||||
cdef int rsize
|
||||
cdef char *source = idata
|
||||
cdef char *dest = self.buffer
|
||||
with nogil:
|
||||
osize = LZ4_decompress_safe(source, dest, isize, osize)
|
||||
if osize < 0:
|
||||
# malformed input data, buffer too small, ...
|
||||
raise Exception('lz4 decompress failed')
|
||||
return dest[:osize]
|
||||
cdef char *dest
|
||||
# a bit more than 8MB is enough for the usual data sizes yielded by the chunker.
|
||||
# allocate more if isize * 3 is already bigger, to avoid having to resize often.
|
||||
osize = max(int(1.1 * 2**23), isize * 3)
|
||||
while True:
|
||||
dest = get_buffer(osize)
|
||||
with nogil:
|
||||
rsize = LZ4_decompress_safe(source, dest, isize, osize)
|
||||
if rsize >= 0:
|
||||
break
|
||||
if osize > 2 ** 30:
|
||||
# this is insane, get out of here
|
||||
raise Exception('lz4 decompress failed')
|
||||
# likely the buffer was too small, get a bigger one:
|
||||
osize = int(1.5 * osize)
|
||||
return dest[:rsize]
|
||||
|
||||
|
||||
class LZMA(CompressorBase):
|
||||
|
@ -192,8 +205,3 @@ class Compressor:
|
|||
return cls(**self.params).decompress(data)
|
||||
else:
|
||||
raise ValueError('No decompressor for this data found: %r.', data[:2])
|
||||
|
||||
|
||||
# a buffer used for (de)compression result, which can be slightly bigger
|
||||
# than the chunk buffer in the worst (incompressible data) case, add 10%:
|
||||
COMPR_BUFFER = bytes(int(1.1 * 2 ** 23)) # CHUNK_MAX_EXP == 23
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
from . import hashindex
|
||||
from . import shellpattern
|
||||
from .constants import * # NOQA
|
||||
from .compress import COMPR_BUFFER, get_compressor
|
||||
from .compress import get_compressor
|
||||
|
||||
# meta dict, data bytes
|
||||
_Chunk = namedtuple('_Chunk', 'meta data')
|
||||
|
@ -470,8 +470,6 @@ def ChunkerParams(s):
|
|||
return CHUNKER_PARAMS
|
||||
chunk_min, chunk_max, chunk_mask, window_size = s.split(',')
|
||||
if int(chunk_max) > 23:
|
||||
# do not go beyond 2**23 (8MB) chunk size now,
|
||||
# COMPR_BUFFER can only cope with up to this size
|
||||
raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)')
|
||||
return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size)
|
||||
|
||||
|
@ -1538,16 +1536,14 @@ def decide(self, chunk):
|
|||
# if we compress the data here to decide, we can even update the chunk data
|
||||
# and modify the metadata as desired.
|
||||
compr_spec = chunk.meta.get('compress', self.compression)
|
||||
compr_args = dict(buffer=COMPR_BUFFER)
|
||||
compr_args.update(compr_spec)
|
||||
if compr_args['name'] == 'auto':
|
||||
if compr_spec['name'] == 'auto':
|
||||
# we did not decide yet, use heuristic:
|
||||
compr_args, chunk = self.heuristic_lz4(compr_args, chunk)
|
||||
return compr_args, chunk
|
||||
compr_spec, chunk = self.heuristic_lz4(compr_spec, chunk)
|
||||
return compr_spec, chunk
|
||||
|
||||
def heuristic_lz4(self, compr_args, chunk):
|
||||
meta, data = chunk
|
||||
lz4 = get_compressor('lz4', buffer=compr_args['buffer'])
|
||||
lz4 = get_compressor('lz4')
|
||||
cdata = lz4.compress(data)
|
||||
data_len = len(data)
|
||||
cdata_len = len(cdata)
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
logger = create_logger()
|
||||
|
||||
from .constants import * # NOQA
|
||||
from .compress import Compressor, COMPR_BUFFER, get_compressor
|
||||
from .compress import Compressor, get_compressor
|
||||
from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256
|
||||
from .helpers import Chunk
|
||||
from .helpers import Error, IntegrityError
|
||||
|
@ -89,7 +89,7 @@ def __init__(self, repository):
|
|||
self.repository = repository
|
||||
self.target = None # key location file path / repo obj
|
||||
self.compression_decider2 = CompressionDecider2(CompressionSpec('none'))
|
||||
self.compressor = Compressor('none', buffer=COMPR_BUFFER) # for decompression
|
||||
self.compressor = Compressor('none') # for decompression
|
||||
|
||||
def id_hash(self, data):
|
||||
"""Return HMAC hash using the "id" HMAC key
|
||||
|
|
|
@ -909,9 +909,14 @@ def _read(self, fd, fmt, header, segment, offset, acceptable_tags, read_data=Tru
|
|||
key = None
|
||||
else:
|
||||
raise TypeError("_read called with unsupported format")
|
||||
if size > MAX_OBJECT_SIZE or size < fmt.size:
|
||||
raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format(
|
||||
segment, offset))
|
||||
if size > MAX_OBJECT_SIZE:
|
||||
# if you get this on an archive made with borg < 1.0.7 and millions of files and
|
||||
# you need to restore it, you can disable this check by using "if False:" above.
|
||||
raise IntegrityError('Invalid segment entry size {} - too big [segment {}, offset {}]'.format(
|
||||
size, segment, offset))
|
||||
if size < fmt.size:
|
||||
raise IntegrityError('Invalid segment entry size {} - too small [segment {}, offset {}]'.format(
|
||||
size, segment, offset))
|
||||
length = size - fmt.size
|
||||
if read_data:
|
||||
data = fd.read(length)
|
||||
|
@ -942,8 +947,12 @@ def _read(self, fd, fmt, header, segment, offset, acceptable_tags, read_data=Tru
|
|||
return size, tag, key, data
|
||||
|
||||
def write_put(self, id, data, raise_full=False):
|
||||
data_size = len(data)
|
||||
if data_size > MAX_DATA_SIZE:
|
||||
# this would push the segment entry size beyond MAX_OBJECT_SIZE.
|
||||
raise IntegrityError('More than allowed put data [{} > {}]'.format(data_size, MAX_DATA_SIZE))
|
||||
fd = self.get_write_fd(raise_full=raise_full)
|
||||
size = len(data) + self.put_header_fmt.size
|
||||
size = data_size + self.put_header_fmt.size
|
||||
offset = self.offset
|
||||
header = self.header_no_crc_fmt.pack(size, TAG_PUT)
|
||||
crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
|
||||
|
@ -972,3 +981,6 @@ def write_commit(self, intermediate=False):
|
|||
crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
|
||||
fd.write(b''.join((crc, header)))
|
||||
self.close_segment()
|
||||
|
||||
|
||||
MAX_DATA_SIZE = MAX_OBJECT_SIZE - LoggedIO.put_header_fmt.size
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
import zlib
|
||||
try:
|
||||
import lzma
|
||||
|
@ -11,13 +12,13 @@
|
|||
|
||||
buffer = bytes(2**16)
|
||||
data = b'fooooooooobaaaaaaaar' * 10
|
||||
params = dict(name='zlib', level=6, buffer=buffer)
|
||||
params = dict(name='zlib', level=6)
|
||||
|
||||
|
||||
def test_get_compressor():
|
||||
c = get_compressor(name='none')
|
||||
assert isinstance(c, CNONE)
|
||||
c = get_compressor(name='lz4', buffer=buffer)
|
||||
c = get_compressor(name='lz4')
|
||||
assert isinstance(c, LZ4)
|
||||
c = get_compressor(name='zlib')
|
||||
assert isinstance(c, ZLIB)
|
||||
|
@ -35,13 +36,21 @@ def test_cnull():
|
|||
|
||||
|
||||
def test_lz4():
|
||||
c = get_compressor(name='lz4', buffer=buffer)
|
||||
c = get_compressor(name='lz4')
|
||||
cdata = c.compress(data)
|
||||
assert len(cdata) < len(data)
|
||||
assert data == c.decompress(cdata)
|
||||
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||
|
||||
|
||||
def test_lz4_buffer_allocation():
|
||||
# test with a rather huge data object to see if buffer allocation / resizing works
|
||||
data = os.urandom(50 * 2**20) # 50MiB incompressible data
|
||||
c = get_compressor(name='lz4')
|
||||
cdata = c.compress(data)
|
||||
assert data == c.decompress(cdata)
|
||||
|
||||
|
||||
def test_zlib():
|
||||
c = get_compressor(name='zlib')
|
||||
cdata = c.compress(data)
|
||||
|
@ -83,16 +92,16 @@ def test_zlib_compat():
|
|||
|
||||
def test_compressor():
|
||||
params_list = [
|
||||
dict(name='none', buffer=buffer),
|
||||
dict(name='lz4', buffer=buffer),
|
||||
dict(name='zlib', level=0, buffer=buffer),
|
||||
dict(name='zlib', level=6, buffer=buffer),
|
||||
dict(name='zlib', level=9, buffer=buffer),
|
||||
dict(name='none'),
|
||||
dict(name='lz4'),
|
||||
dict(name='zlib', level=0),
|
||||
dict(name='zlib', level=6),
|
||||
dict(name='zlib', level=9),
|
||||
]
|
||||
if lzma:
|
||||
params_list += [
|
||||
dict(name='lzma', level=0, buffer=buffer),
|
||||
dict(name='lzma', level=6, buffer=buffer),
|
||||
dict(name='lzma', level=0),
|
||||
dict(name='lzma', level=6),
|
||||
# we do not test lzma on level 9 because of the huge memory needs
|
||||
]
|
||||
for params in params_list:
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
from ..helpers import IntegrityError
|
||||
from ..locking import Lock, LockFailed
|
||||
from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line
|
||||
from ..repository import Repository, LoggedIO, MAGIC
|
||||
from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE
|
||||
from . import BaseTestCase
|
||||
|
||||
|
||||
|
@ -142,6 +142,13 @@ def test_list(self):
|
|||
self.assert_equal(second_half, all[50:])
|
||||
self.assert_equal(len(self.repository.list(limit=50)), 50)
|
||||
|
||||
def test_max_data_size(self):
|
||||
max_data = b'x' * MAX_DATA_SIZE
|
||||
self.repository.put(b'00000000000000000000000000000000', max_data)
|
||||
self.assert_equal(self.repository.get(b'00000000000000000000000000000000'), max_data)
|
||||
self.assert_raises(IntegrityError,
|
||||
lambda: self.repository.put(b'00000000000000000000000000000001', max_data + b'x'))
|
||||
|
||||
|
||||
class LocalRepositoryTestCase(RepositoryTestCaseBase):
|
||||
# test case that doesn't work with remote repositories
|
||||
|
|
Loading…
Reference in a new issue