From 88798ae94954ed8fe52e57fa26554110d2369e07 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 31 Jul 2016 23:09:57 +0200 Subject: [PATCH] recreate: --always-recompress, --compression-from what a mess --- src/borg/archive.py | 20 +++++++++++++++----- src/borg/archiver.py | 4 ++++ src/borg/compress.pyx | 2 ++ src/borg/helpers.py | 4 +++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 35ccff0a1..fc131d516 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -19,6 +19,7 @@ logger = create_logger() from . import xattr from .cache import ChunkListEntry from .chunker import Chunker +from .compress import Compressor from .constants import * # NOQA from .hashindex import ChunkIndex, ChunkIndexEntry from .helpers import Manifest @@ -1298,7 +1299,7 @@ class ArchiveRecreater: def __init__(self, repository, manifest, key, cache, matcher, exclude_caches=False, exclude_if_present=None, keep_tag_files=False, - chunker_params=None, compression=None, compression_files=None, + chunker_params=None, compression=None, compression_files=None, always_recompress=False, dry_run=False, stats=False, progress=False, file_status_printer=None): self.repository = repository self.key = key @@ -1312,10 +1313,11 @@ class ArchiveRecreater: self.chunker_params = chunker_params or CHUNKER_PARAMS self.recompress = bool(compression) + self.always_recompress = always_recompress self.compression = compression or CompressionSpec('none') self.seen_chunks = set() self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'), - compression_files or []) + compression_files or []) key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none')) self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100) @@ -1404,7 +1406,6 @@ class ArchiveRecreater: def process_chunks(self, archive, target, item): """Return new chunk ID list for 'item'.""" - # TODO: support --compression-from if not self.recompress and not target.recreate_rechunkify: for chunk_id, size, csize in item.chunks: self.cache.chunk_incref(chunk_id, target.stats) @@ -1412,13 +1413,22 @@ class ArchiveRecreater: new_chunks = self.process_partial_chunks(target) chunk_iterator = self.create_chunk_iterator(archive, target, item) consume(chunk_iterator, len(new_chunks)) + compress = self.compression_decider1.decide(item.path) for chunk in chunk_iterator: + chunk.meta['compress'] = compress chunk_id = self.key.id_hash(chunk.data) if chunk_id in self.seen_chunks: new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats)) else: - # TODO: detect / skip / --always-recompress - chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress) + compression_spec, chunk = self.key.compression_decider2.decide(chunk) + overwrite = self.recompress + if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks: + # Check if this chunk is already compressed the way we want it + old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False) + if Compressor.detect(old_chunk.data).name == compression_spec['name']: + # Stored chunk has the same compression we wanted + overwrite = False + chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite) new_chunks.append((chunk_id, size, csize)) self.seen_chunks.add(chunk_id) if self.recompress: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index d58ebe455..b03b0a4c1 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -957,6 +957,7 @@ class Archiver: exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params, compression=args.compression, compression_files=args.compression_files, + always_recompress=args.always_recompress, progress=args.progress, stats=args.stats, file_status_printer=self.print_file_status, dry_run=args.dry_run) @@ -2098,6 +2099,9 @@ class Archiver: 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' 'lzma == lzma (default level 6),\n' 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true', + help='always recompress chunks, don\'t skip chunks already compressed with the same' + 'algorithm.') archive_group.add_argument('--compression-from', dest='compression_files', type=argparse.FileType('r'), action='append', metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 6c42493f5..50785ea1a 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -6,6 +6,8 @@ except ImportError: from .helpers import Buffer +API_VERSION = 2 + cdef extern from "lz4.h": int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 2324d32a9..f4c553836 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -84,11 +84,13 @@ class PlaceholderError(Error): def check_extension_modules(): - from . import platform + from . import platform, compress if hashindex.API_VERSION != 3: raise ExtensionModuleError if chunker.API_VERSION != 2: raise ExtensionModuleError + if compress.API_VERSION != 2: + raise ExtensionModuleError if crypto.API_VERSION != 3: raise ExtensionModuleError if platform.API_VERSION != 3: