mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-22 06:01:54 +00:00
ObfuscateSize compressor
This commit is contained in:
parent
a2487fcdcc
commit
b45874bead
6 changed files with 192 additions and 4 deletions
|
@ -59,6 +59,10 @@ Main features
|
|||
All data can be protected using 256-bit AES encryption, data integrity and
|
||||
authenticity is verified using HMAC-SHA256. Data is encrypted clientside.
|
||||
|
||||
**Obfuscation**
|
||||
Optionally, borg can actively obfuscate e.g. the size of files / chunks to
|
||||
make fingerprinting attacks more difficult.
|
||||
|
||||
**Compression**
|
||||
All data can be optionally compressed:
|
||||
|
||||
|
|
|
@ -426,6 +426,27 @@ he assumes that the victim also possesses (and backups into the repository)
|
|||
could try a brute force fingerprinting attack based on the chunk sizes in the
|
||||
repository to prove his assumption.
|
||||
|
||||
To make this more difficult, borg has an ``obfuscate`` pseudo compressor, that
|
||||
will take the output of the normal compression step and tries to obfuscate
|
||||
the size of that output. Of course, it can only **add** to the size, not reduce
|
||||
it. Thus, the optional usage of this mechanism comes at a cost: it will make
|
||||
your repository larger (ranging from a few percent larger [cheap] to ridiculously
|
||||
larger [expensive], depending on the algorithm/params you wisely choose).
|
||||
|
||||
The output of the compressed-size obfuscation step will then be encrypted and
|
||||
authenticated, as usual. Of course, using that obfuscation would not make any
|
||||
sense without encryption. Thus, the additional data added by the obfuscator
|
||||
are just 0x00 bytes, which is good enough because after encryption it will
|
||||
look like random anyway.
|
||||
|
||||
To summarize, this is making size-based fingerprinting difficult:
|
||||
|
||||
- user-selectable chunker algorithm (and parametrization)
|
||||
- for the buzhash chunker: secret, random per-repo chunker seed
|
||||
- user-selectable compression algorithm (and level)
|
||||
- optional ``obfuscate`` pseudo compressor with different choices
|
||||
of algorithm and parameters
|
||||
|
||||
Stored chunk proximity
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -2392,6 +2392,32 @@ def do_break_lock(self, args, repository):
|
|||
For compressible data, it uses the given C[,L] compression - with C[,L]
|
||||
being any valid compression specifier.
|
||||
|
||||
obfuscate,SPEC,C[,L]
|
||||
Use compressed-size obfuscation to make fingerprinting attacks based on
|
||||
the observable stored chunk size more difficult.
|
||||
Note:
|
||||
- you must combine this with encryption or it won't make any sense.
|
||||
- your repo size will be bigger, of course.
|
||||
|
||||
The SPEC value will determine how the size obfuscation will work:
|
||||
|
||||
Relative random reciprocal size variation:
|
||||
Size will increase by a factor, relative to the compressed data size.
|
||||
Smaller factors are often used, larger factors rarely.
|
||||
1: factor 0.01 .. 100.0
|
||||
2: factor 0.1 .. 1000.0
|
||||
3: factor 1.0 .. 10000.0
|
||||
4: factor 10.0 .. 100000.0
|
||||
5: factor 100.0 .. 1000000.0
|
||||
6: factor 1000.0 .. 10000000.0
|
||||
|
||||
Add a randomly sized padding up to the given size:
|
||||
110: 1kiB
|
||||
...
|
||||
120: 1MiB
|
||||
...
|
||||
123: 8MiB (max.)
|
||||
|
||||
Examples::
|
||||
|
||||
borg create --compression lz4 REPO::ARCHIVE data
|
||||
|
@ -2400,7 +2426,10 @@ def do_break_lock(self, args, repository):
|
|||
borg create --compression zlib REPO::ARCHIVE data
|
||||
borg create --compression zlib,1 REPO::ARCHIVE data
|
||||
borg create --compression auto,lzma,6 REPO::ARCHIVE data
|
||||
borg create --compression auto,lzma ...\n\n''')
|
||||
borg create --compression auto,lzma ...
|
||||
borg create --compression obfuscate,3,none ...
|
||||
borg create --compression obfuscate,3,auto,zstd,10 ...
|
||||
borg create --compression obfuscate,2,zstd,6 ...\n\n''')
|
||||
|
||||
def do_help(self, parser, commands, args):
|
||||
if not args.topic:
|
||||
|
|
|
@ -15,6 +15,8 @@ which compressor has been used to compress the data and dispatch to the correct
|
|||
decompressor.
|
||||
"""
|
||||
|
||||
import random
|
||||
from struct import Struct
|
||||
import zlib
|
||||
|
||||
try:
|
||||
|
@ -23,9 +25,10 @@ except ImportError:
|
|||
lzma = None
|
||||
|
||||
|
||||
from .constants import MAX_DATA_SIZE
|
||||
from .helpers import Buffer, DecompressionError
|
||||
|
||||
API_VERSION = '1.2_01'
|
||||
API_VERSION = '1.2_02'
|
||||
|
||||
cdef extern from "algorithms/lz4-libselect.h":
|
||||
int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
|
@ -433,6 +436,69 @@ class Auto(CompressorBase):
|
|||
raise NotImplementedError
|
||||
|
||||
|
||||
class ObfuscateSize(CompressorBase):
|
||||
"""
|
||||
Meta-Compressor that obfuscates the compressed data size.
|
||||
"""
|
||||
ID = b'\x04\x00'
|
||||
name = 'obfuscate'
|
||||
|
||||
header_fmt = Struct('>I')
|
||||
header_len = len(header_fmt.pack(0))
|
||||
|
||||
def __init__(self, level=None, compressor=None):
|
||||
super().__init__()
|
||||
self.compressor = compressor
|
||||
if level is None:
|
||||
pass # decompression
|
||||
elif 1 <= level <= 6:
|
||||
self._obfuscate = self._relative_random_reciprocal_obfuscate
|
||||
self.factor = 0.001 * 10 ** level
|
||||
self.min_r = 0.0001
|
||||
elif 110 <= level <= 123:
|
||||
self._obfuscate = self._random_padding_obfuscate
|
||||
self.max_padding_size = 2 ** (level - 100) # 1kiB .. 8MiB
|
||||
|
||||
def _obfuscate(self, compr_size):
|
||||
# implementations need to return the size of obfuscation data,
|
||||
# that the caller shall add.
|
||||
raise NotImplemented
|
||||
|
||||
def _relative_random_reciprocal_obfuscate(self, compr_size):
|
||||
# effect for SPEC 1:
|
||||
# f = 0.01 .. 0.1 for r in 1.0 .. 0.1 == in 90% of cases
|
||||
# f = 0.1 .. 1.0 for r in 0.1 .. 0.01 == in 9% of cases
|
||||
# f = 1.0 .. 10.0 for r in 0.01 .. 0.001 = in 0.9% of cases
|
||||
# f = 10.0 .. 100.0 for r in 0.001 .. 0.0001 == in 0.09% of cases
|
||||
r = max(self.min_r, random.random()) # 0..1, but dont get too close to 0
|
||||
f = self.factor / r
|
||||
return int(compr_size * f)
|
||||
|
||||
def _random_padding_obfuscate(self, compr_size):
|
||||
return int(self.max_padding_size * random.random())
|
||||
|
||||
def compress(self, data):
|
||||
compressed_data = self.compressor.compress(data) # compress data
|
||||
compr_size = len(compressed_data)
|
||||
header = self.header_fmt.pack(compr_size)
|
||||
addtl_size = self._obfuscate(compr_size)
|
||||
addtl_size = max(0, addtl_size) # we can only make it longer, not shorter!
|
||||
addtl_size = min(MAX_DATA_SIZE - 1024 - compr_size, addtl_size) # stay away from MAX_DATA_SIZE
|
||||
trailer = bytes(addtl_size)
|
||||
obfuscated_data = b''.join([header, compressed_data, trailer])
|
||||
return super().compress(obfuscated_data) # add ID header
|
||||
|
||||
def decompress(self, data):
|
||||
if not isinstance(data, memoryview):
|
||||
data = memoryview(data)
|
||||
obfuscated_data = super().decompress(data) # remove obfuscator ID header
|
||||
compr_size = self.header_fmt.unpack(obfuscated_data[0:self.header_len])[0]
|
||||
compressed_data = obfuscated_data[self.header_len:self.header_len+compr_size]
|
||||
if self.compressor is None:
|
||||
self.compressor = Compressor.detect(compressed_data)()
|
||||
return self.compressor.decompress(compressed_data) # decompress data
|
||||
|
||||
|
||||
# Maps valid compressor names to their class
|
||||
COMPRESSOR_TABLE = {
|
||||
CNONE.name: CNONE,
|
||||
|
@ -441,9 +507,10 @@ COMPRESSOR_TABLE = {
|
|||
LZMA.name: LZMA,
|
||||
Auto.name: Auto,
|
||||
ZSTD.name: ZSTD,
|
||||
ObfuscateSize.name: ObfuscateSize,
|
||||
}
|
||||
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
|
||||
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ] # check fast stuff first
|
||||
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ] # check fast stuff first
|
||||
|
||||
def get_compressor(name, **kwargs):
|
||||
cls = COMPRESSOR_TABLE[name]
|
||||
|
@ -515,6 +582,16 @@ class CompressionSpec:
|
|||
else:
|
||||
raise ValueError
|
||||
self.inner = CompressionSpec(compression)
|
||||
elif self.name == 'obfuscate':
|
||||
if 3 <= count <= 5:
|
||||
level = int(values[1])
|
||||
if not ((1 <= level <= 6) or (110 <= level <= 123)):
|
||||
raise ValueError
|
||||
self.level = level
|
||||
compression = ','.join(values[2:])
|
||||
else:
|
||||
raise ValueError
|
||||
self.inner = CompressionSpec(compression)
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
|
@ -526,3 +603,5 @@ class CompressionSpec:
|
|||
return get_compressor(self.name, level=self.level)
|
||||
elif self.name == 'auto':
|
||||
return get_compressor(self.name, compressor=self.inner.compressor)
|
||||
elif self.name == 'obfuscate':
|
||||
return get_compressor(self.name, level=self.level, compressor=self.inner.compressor)
|
||||
|
|
|
@ -29,7 +29,7 @@ def check_extension_modules():
|
|||
raise ExtensionModuleError
|
||||
if chunker.API_VERSION != '1.2_01':
|
||||
raise ExtensionModuleError
|
||||
if compress.API_VERSION != '1.2_01':
|
||||
if compress.API_VERSION != '1.2_02':
|
||||
raise ExtensionModuleError
|
||||
if borg.crypto.low_level.API_VERSION != '1.2_01':
|
||||
raise ExtensionModuleError
|
||||
|
|
|
@ -140,6 +140,61 @@ def test_auto():
|
|||
assert Compressor.detect(compressed) == CNONE
|
||||
|
||||
|
||||
def test_obfuscate():
|
||||
compressor = CompressionSpec('obfuscate,1,none').compressor
|
||||
data = bytes(10000)
|
||||
compressed = compressor.compress(data)
|
||||
# 2 id bytes compression, 2 id bytes obfuscator. 4 length bytes
|
||||
assert len(data) + 8 <= len(compressed) <= len(data) * 101 + 8
|
||||
# compressing 100 times the same data should give at least 50 different result sizes
|
||||
assert len(set(len(compressor.compress(data)) for i in range(100))) > 50
|
||||
|
||||
cs = CompressionSpec('obfuscate,2,lz4')
|
||||
assert isinstance(cs.inner.compressor, LZ4)
|
||||
compressor = cs.compressor
|
||||
data = bytes(10000)
|
||||
compressed = compressor.compress(data)
|
||||
# 2 id bytes compression, 2 id bytes obfuscator. 4 length bytes
|
||||
min_compress, max_compress = 0.2, 0.001 # estimate compression factor outer boundaries
|
||||
assert max_compress * len(data) + 8 <= len(compressed) <= min_compress * len(data) * 1001 + 8
|
||||
# compressing 100 times the same data should give multiple different result sizes
|
||||
assert len(set(len(compressor.compress(data)) for i in range(100))) > 10
|
||||
|
||||
cs = CompressionSpec('obfuscate,6,zstd,3')
|
||||
assert isinstance(cs.inner.compressor, ZSTD)
|
||||
compressor = cs.compressor
|
||||
data = bytes(10000)
|
||||
compressed = compressor.compress(data)
|
||||
# 2 id bytes compression, 2 id bytes obfuscator. 4 length bytes
|
||||
min_compress, max_compress = 0.2, 0.001 # estimate compression factor outer boundaries
|
||||
assert max_compress * len(data) + 8 <= len(compressed) <= min_compress * len(data) * 10000001 + 8
|
||||
# compressing 100 times the same data should give multiple different result sizes
|
||||
assert len(set(len(compressor.compress(data)) for i in range(100))) > 90
|
||||
|
||||
cs = CompressionSpec('obfuscate,2,auto,zstd,10')
|
||||
assert isinstance(cs.inner.compressor, Auto)
|
||||
compressor = cs.compressor
|
||||
data = bytes(10000)
|
||||
compressed = compressor.compress(data)
|
||||
# 2 id bytes compression, 2 id bytes obfuscator. 4 length bytes
|
||||
min_compress, max_compress = 0.2, 0.001 # estimate compression factor outer boundaries
|
||||
assert max_compress * len(data) + 8 <= len(compressed) <= min_compress * len(data) * 1001 + 8
|
||||
# compressing 100 times the same data should give multiple different result sizes
|
||||
assert len(set(len(compressor.compress(data)) for i in range(100))) > 10
|
||||
|
||||
cs = CompressionSpec('obfuscate,110,none')
|
||||
assert isinstance(cs.inner.compressor, CNONE)
|
||||
compressor = cs.compressor
|
||||
data = bytes(1000)
|
||||
compressed = compressor.compress(data)
|
||||
# N blocks + 2 id bytes obfuscator. 4 length bytes
|
||||
assert 1000 + 6 <= len(compressed) <= 1000 + 6 + 1024
|
||||
data = bytes(1100)
|
||||
compressed = compressor.compress(data)
|
||||
# N blocks + 2 id bytes obfuscator. 4 length bytes
|
||||
assert 1100 + 6 <= len(compressed) <= 1100 + 6 + 1024
|
||||
|
||||
|
||||
def test_compression_specs():
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('')
|
||||
|
|
Loading…
Reference in a new issue