remove --compression-from

This commit is contained in:
Marian Beermann 2017-04-03 21:48:06 +02:00
parent 929f2760dd
commit 69fb9bd403
6 changed files with 14 additions and 138 deletions

View File

@ -36,7 +36,6 @@ from .helpers import bin_to_hex
from .helpers import safe_ns
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import CompressionDecider
from .item import Item, ArchiveItem
from .key import key_factory
from .platform import acl_get, acl_set, set_flags, get_flags, swidth
@ -278,7 +277,7 @@ class Archive:
def __init__(self, repository, key, manifest, name, cache=None, create=False,
checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, progress=False,
chunker_params=CHUNKER_PARAMS, start=None, start_monotonic=None, end=None, compression=None, compression_files=None,
chunker_params=CHUNKER_PARAMS, start=None, start_monotonic=None, end=None,
consider_part_files=False, log_json=False):
self.cwd = os.getcwd()
self.key = key
@ -307,11 +306,8 @@ class Archive:
self.pipeline = DownloadPipeline(self.repository, self.key)
self.create = create
if self.create:
self.file_compression_logger = create_logger('borg.debug.file-compression')
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
self.compression_decider = CompressionDecider(compression or CompressionSpec('none'),
compression_files or [])
if name in manifest.archives:
raise self.AlreadyExists(name)
self.last_checkpoint = time.monotonic()
@ -970,12 +966,10 @@ Utilization of max. archive size: {csize_max:.0%}
if chunks is not None:
item.chunks = chunks
else:
compressor = self.compression_decider.decide(path)
self.file_compression_logger.debug('%s -> compression %s', path, compressor.name)
with backup_io('open'):
fh = Archive._open_rb(path)
with os.fdopen(fh, 'rb') as fd:
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compressor=compressor)
self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)))
if not is_special_file:
# we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing.
@ -1561,7 +1555,7 @@ class ArchiveRecreater:
def __init__(self, repository, manifest, key, cache, matcher,
exclude_caches=False, exclude_if_present=None, keep_exclude_tags=False,
chunker_params=None, compression=None, compression_files=None, always_recompress=False,
chunker_params=None, compression=None, always_recompress=False,
dry_run=False, stats=False, progress=False, file_status_printer=None,
checkpoint_interval=1800):
self.repository = repository
@ -1582,8 +1576,6 @@ class ArchiveRecreater:
self.always_recompress = always_recompress
self.compression = compression or CompressionSpec('none')
self.seen_chunks = set()
self.compression_decider = CompressionDecider(compression or CompressionSpec('none'),
compression_files or [])
self.dry_run = dry_run
self.stats = stats
@ -1652,11 +1644,10 @@ class ArchiveRecreater:
self.cache.chunk_incref(chunk_id, target.stats)
return item.chunks
chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
compressor = self.compression_decider.decide(item.path)
chunk_processor = partial(self.chunk_processor, target, compressor)
chunk_processor = partial(self.chunk_processor, target)
target.chunk_file(item, self.cache, target.stats, chunk_iterator, chunk_processor)
def chunk_processor(self, target, compressor, data):
def chunk_processor(self, target, data):
chunk_id = self.key.id_hash(data)
if chunk_id in self.seen_chunks:
return self.cache.chunk_incref(chunk_id, target.stats)
@ -1664,10 +1655,10 @@ class ArchiveRecreater:
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
if Compressor.detect(old_chunk.data).name == compressor.decide(data).name:
if Compressor.detect(old_chunk.data).name == self.key.compressor.decide(data).name:
# Stored chunk has the same compression we wanted
overwrite = False
chunk = Chunk(data, compressor=compressor)
chunk = Chunk(data)
chunk_entry = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite, wait=False)
self.cache.repository.async_response(wait=False)
self.seen_chunks.add(chunk_entry.id)
@ -1753,7 +1744,7 @@ class ArchiveRecreater:
def create_target_archive(self, name):
target = Archive(self.repository, self.key, self.manifest, name, create=True,
progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
checkpoint_interval=self.checkpoint_interval, compression=self.compression)
checkpoint_interval=self.checkpoint_interval)
return target
def open_archive(self, name, **kwargs):

View File

@ -481,7 +481,6 @@ class Archiver:
numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
progress=args.progress,
chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic,
compression=args.compression, compression_files=args.compression_files,
log_json=args.log_json)
create_inner(archive, cache)
else:
@ -1335,8 +1334,7 @@ class Archiver:
recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
keep_exclude_tags=args.keep_exclude_tags, chunker_params=args.chunker_params,
compression=args.compression, compression_files=args.compression_files,
always_recompress=args.always_recompress,
compression=args.compression, always_recompress=args.always_recompress,
progress=args.progress, stats=args.stats,
file_status_printer=self.print_file_status,
checkpoint_interval=args.checkpoint_interval,
@ -1799,43 +1797,13 @@ class Archiver:
For compressible data, it uses the given C[,L] compression - with C[,L]
being any valid compression specifier.
The decision about which compression to use is done by borg like this:
1. find a compression specifier (per file):
match the path/filename against all patterns in all --compression-from
files (if any). If a pattern matches, use the compression spec given for
that pattern. If no pattern matches (and also if you do not give any
--compression-from option), default to the compression spec given by
--compression. See docs/misc/compression.conf for an example config.
2. if the found compression spec is not "auto", the decision is taken:
use the found compression spec.
3. if the found compression spec is "auto", test compressibility of each
chunk using lz4.
If it is compressible, use the C,[L] compression spec given within the
"auto" specifier. If it is not compressible, use no compression.
Examples::
borg create --compression lz4 REPO::ARCHIVE data
borg create --compression zlib REPO::ARCHIVE data
borg create --compression zlib,1 REPO::ARCHIVE data
borg create --compression auto,lzma,6 REPO::ARCHIVE data
borg create --compression-from compression.conf --compression auto,lzma ...
compression.conf has entries like::
# example config file for --compression-from option
#
# Format of non-comment / non-empty lines:
# <compression-spec>:<path/filename pattern>
# compression-spec is same format as for --compression option
# path/filename pattern is same format as for --exclude option
none:*.gz
none:*.zip
none:*.mp3
none:*.ogg
borg create --compression auto,lzma ...
General remarks:
@ -2424,11 +2392,6 @@ class Archiver:
type=CompressionSpec, default=CompressionSpec('lz4'), metavar='COMPRESSION',
help='select compression algorithm, see the output of the '
'"borg help compression" command for details.')
archive_group.add_argument('--compression-from', dest='compression_files',
type=argparse.FileType('r'), action='append',
metavar='COMPRESSIONCONFIG',
help='read compression patterns from COMPRESSIONCONFIG, see the output of the '
'"borg help compression" command for details.')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
@ -2964,7 +2927,7 @@ class Archiver:
resulting archive will only contain files from these PATHs.
Note that all paths in an archive are relative, therefore absolute patterns/paths
will *not* match (--exclude, --exclude-from, --compression-from, PATHs).
will *not* match (--exclude, --exclude-from, PATHs).
--compression: all chunks seen will be stored using the given method.
Due to how Borg stores compressed size information this might display
@ -3059,11 +3022,6 @@ class Archiver:
archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
help='always recompress chunks, don\'t skip chunks already compressed with the same '
'algorithm.')
archive_group.add_argument('--compression-from', dest='compression_files',
type=argparse.FileType('r'), action='append',
metavar='COMPRESSIONCONFIG',
help='read compression patterns from COMPRESSIONCONFIG, see the output of the '
'"borg help compression" command for details.')
archive_group.add_argument('--chunker-params', dest='chunker_params',
type=ChunkerParams, default=CHUNKER_PARAMS,
metavar='PARAMS',

View File

@ -5,18 +5,6 @@ borg.compress
Compression is applied to chunks after ID hashing (so the ID is a direct function of the
plain chunk, compression is irrelevant to it), and of course before encryption.
Borg has a flexible scheme for deciding which compression to use for chunks.
First, there is a global default set by the --compression command line option,
which sets the .compressor attribute on the Key.
For chunks that emanate from files CompressionDecider may set a specific
Compressor based on patterns (this is the --compression-from option). This is stored
as a Compressor instance in the "compressor" key in the Chunk's meta dictionary.
When compressing (KeyBase.compress) either the Compressor specified in the Chunk's
meta dictionary is used, or the default Compressor of the key.
The "auto" mode (e.g. --compression auto,lzma,4) is implemented as a meta Compressor,
meaning that Auto acts like a Compressor, but defers actual work to others (namely
LZ4 as a heuristic whether compression is worth it, and the specified Compressor

View File

@ -2096,39 +2096,6 @@ def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comme
yield line
class CompressionDecider:
def __init__(self, compression, compression_files):
"""
Initialize a CompressionDecider instance (and read config files, if needed).
:param compression: default CompressionSpec (e.g. from --compression option)
:param compression_files: list of compression config files (e.g. from --compression-from) or
a list of other line iterators
"""
from .compress import CompressionSpec
self.compressor = compression.compressor
if not compression_files:
self.matcher = None
else:
self.matcher = PatternMatcher(fallback=compression.compressor)
for file in compression_files:
try:
for line in clean_lines(file):
try:
compr_spec, fn_pattern = line.split(':', 1)
except:
continue
self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec).compressor)
finally:
if hasattr(file, 'close'):
file.close()
def decide(self, path):
if self.matcher is not None:
return self.matcher.match(path)
return self.compressor
class ErrorIgnoringTextIOWrapper(io.TextIOWrapper):
def read(self, n):
if not self.closed:

View File

@ -152,10 +152,6 @@ class KeyBase:
"""Return HMAC hash using the "id" HMAC key
"""
def compress(self, chunk):
meta, data = chunk
return meta.get('compressor', self.compressor).compress(data)
def encrypt(self, chunk):
pass
@ -256,7 +252,7 @@ class PlaintextKey(KeyBase):
return sha256(data).digest()
def encrypt(self, chunk):
data = self.compress(chunk)
data = self.compressor.compress(chunk.data)
return b''.join([self.TYPE_STR, data])
def decrypt(self, id, data, decompress=True):
@ -334,7 +330,7 @@ class AESKeyBase(KeyBase):
MAC = hmac_sha256
def encrypt(self, chunk):
data = self.compress(chunk)
data = self.compressor.compress(chunk.data)
self.nonce_manager.ensure_reservation(num_aes_blocks(len(data)))
self.enc_cipher.reset()
data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
@ -746,7 +742,7 @@ class AuthenticatedKey(ID_BLAKE2b_256, RepoKey):
STORAGE = KeyBlobStorage.REPO
def encrypt(self, chunk):
data = self.compress(chunk)
data = self.compressor.compress(chunk.data)
return b''.join([self.TYPE_STR, data])
def decrypt(self, id, data, decompress=True):

View File

@ -12,7 +12,6 @@ import msgpack
import msgpack.fallback
from .. import platform
from ..compress import CompressionSpec
from ..helpers import Location
from ..helpers import Buffer
from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders
@ -25,7 +24,6 @@ from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import load_exclude_file, load_pattern_file
from ..helpers import CompressionDecider
from ..helpers import parse_pattern, PatternMatcher
from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
from ..helpers import swidth_slice
@ -1202,28 +1200,6 @@ data2
assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ]
def test_compression_decider():
default = CompressionSpec('zlib')
conf = """
# use super-fast lz4 compression on huge VM files in this path:
lz4:/srv/vm_disks
# jpeg or zip files do not compress:
none:*.jpeg
none:*.zip
""".splitlines()
cd = CompressionDecider(default, []) # no conf, always use default
assert cd.decide('/srv/vm_disks/linux').name == 'zlib'
assert cd.decide('test.zip').name == 'zlib'
assert cd.decide('test').name == 'zlib'
cd = CompressionDecider(default, [conf, ])
assert cd.decide('/srv/vm_disks/linux').name == 'lz4'
assert cd.decide('test.zip').name == 'none'
assert cd.decide('test').name == 'zlib' # no match in conf, use default
def test_format_line():
data = dict(foo='bar baz')
assert format_line('', data) == ''