mirror of https://github.com/borgbackup/borg.git
recreate: --always-recompress, --compression-from
what a mess
This commit is contained in:
parent
5433b1a1e4
commit
88798ae949
|
@ -19,6 +19,7 @@ logger = create_logger()
|
||||||
from . import xattr
|
from . import xattr
|
||||||
from .cache import ChunkListEntry
|
from .cache import ChunkListEntry
|
||||||
from .chunker import Chunker
|
from .chunker import Chunker
|
||||||
|
from .compress import Compressor
|
||||||
from .constants import * # NOQA
|
from .constants import * # NOQA
|
||||||
from .hashindex import ChunkIndex, ChunkIndexEntry
|
from .hashindex import ChunkIndex, ChunkIndexEntry
|
||||||
from .helpers import Manifest
|
from .helpers import Manifest
|
||||||
|
@ -1298,7 +1299,7 @@ class ArchiveRecreater:
|
||||||
|
|
||||||
def __init__(self, repository, manifest, key, cache, matcher,
|
def __init__(self, repository, manifest, key, cache, matcher,
|
||||||
exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
|
exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
|
||||||
chunker_params=None, compression=None, compression_files=None,
|
chunker_params=None, compression=None, compression_files=None, always_recompress=False,
|
||||||
dry_run=False, stats=False, progress=False, file_status_printer=None):
|
dry_run=False, stats=False, progress=False, file_status_printer=None):
|
||||||
self.repository = repository
|
self.repository = repository
|
||||||
self.key = key
|
self.key = key
|
||||||
|
@ -1312,6 +1313,7 @@ class ArchiveRecreater:
|
||||||
|
|
||||||
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
self.chunker_params = chunker_params or CHUNKER_PARAMS
|
||||||
self.recompress = bool(compression)
|
self.recompress = bool(compression)
|
||||||
|
self.always_recompress = always_recompress
|
||||||
self.compression = compression or CompressionSpec('none')
|
self.compression = compression or CompressionSpec('none')
|
||||||
self.seen_chunks = set()
|
self.seen_chunks = set()
|
||||||
self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
|
self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
|
||||||
|
@ -1404,7 +1406,6 @@ class ArchiveRecreater:
|
||||||
|
|
||||||
def process_chunks(self, archive, target, item):
|
def process_chunks(self, archive, target, item):
|
||||||
"""Return new chunk ID list for 'item'."""
|
"""Return new chunk ID list for 'item'."""
|
||||||
# TODO: support --compression-from
|
|
||||||
if not self.recompress and not target.recreate_rechunkify:
|
if not self.recompress and not target.recreate_rechunkify:
|
||||||
for chunk_id, size, csize in item.chunks:
|
for chunk_id, size, csize in item.chunks:
|
||||||
self.cache.chunk_incref(chunk_id, target.stats)
|
self.cache.chunk_incref(chunk_id, target.stats)
|
||||||
|
@ -1412,13 +1413,22 @@ class ArchiveRecreater:
|
||||||
new_chunks = self.process_partial_chunks(target)
|
new_chunks = self.process_partial_chunks(target)
|
||||||
chunk_iterator = self.create_chunk_iterator(archive, target, item)
|
chunk_iterator = self.create_chunk_iterator(archive, target, item)
|
||||||
consume(chunk_iterator, len(new_chunks))
|
consume(chunk_iterator, len(new_chunks))
|
||||||
|
compress = self.compression_decider1.decide(item.path)
|
||||||
for chunk in chunk_iterator:
|
for chunk in chunk_iterator:
|
||||||
|
chunk.meta['compress'] = compress
|
||||||
chunk_id = self.key.id_hash(chunk.data)
|
chunk_id = self.key.id_hash(chunk.data)
|
||||||
if chunk_id in self.seen_chunks:
|
if chunk_id in self.seen_chunks:
|
||||||
new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
|
new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
|
||||||
else:
|
else:
|
||||||
# TODO: detect / skip / --always-recompress
|
compression_spec, chunk = self.key.compression_decider2.decide(chunk)
|
||||||
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress)
|
overwrite = self.recompress
|
||||||
|
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
|
||||||
|
# Check if this chunk is already compressed the way we want it
|
||||||
|
old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
|
||||||
|
if Compressor.detect(old_chunk.data).name == compression_spec['name']:
|
||||||
|
# Stored chunk has the same compression we wanted
|
||||||
|
overwrite = False
|
||||||
|
chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
|
||||||
new_chunks.append((chunk_id, size, csize))
|
new_chunks.append((chunk_id, size, csize))
|
||||||
self.seen_chunks.add(chunk_id)
|
self.seen_chunks.add(chunk_id)
|
||||||
if self.recompress:
|
if self.recompress:
|
||||||
|
|
|
@ -957,6 +957,7 @@ class Archiver:
|
||||||
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
|
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
|
||||||
keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
|
keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
|
||||||
compression=args.compression, compression_files=args.compression_files,
|
compression=args.compression, compression_files=args.compression_files,
|
||||||
|
always_recompress=args.always_recompress,
|
||||||
progress=args.progress, stats=args.stats,
|
progress=args.progress, stats=args.stats,
|
||||||
file_status_printer=self.print_file_status,
|
file_status_printer=self.print_file_status,
|
||||||
dry_run=args.dry_run)
|
dry_run=args.dry_run)
|
||||||
|
@ -2098,6 +2099,9 @@ class Archiver:
|
||||||
'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
|
'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
|
||||||
'lzma == lzma (default level 6),\n'
|
'lzma == lzma (default level 6),\n'
|
||||||
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
|
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
|
||||||
|
archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
|
||||||
|
help='always recompress chunks, don\'t skip chunks already compressed with the same'
|
||||||
|
'algorithm.')
|
||||||
archive_group.add_argument('--compression-from', dest='compression_files',
|
archive_group.add_argument('--compression-from', dest='compression_files',
|
||||||
type=argparse.FileType('r'), action='append',
|
type=argparse.FileType('r'), action='append',
|
||||||
metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
|
metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
|
||||||
|
|
|
@ -6,6 +6,8 @@ except ImportError:
|
||||||
|
|
||||||
from .helpers import Buffer
|
from .helpers import Buffer
|
||||||
|
|
||||||
|
API_VERSION = 2
|
||||||
|
|
||||||
cdef extern from "lz4.h":
|
cdef extern from "lz4.h":
|
||||||
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||||
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||||
|
|
|
@ -84,11 +84,13 @@ class PlaceholderError(Error):
|
||||||
|
|
||||||
|
|
||||||
def check_extension_modules():
|
def check_extension_modules():
|
||||||
from . import platform
|
from . import platform, compress
|
||||||
if hashindex.API_VERSION != 3:
|
if hashindex.API_VERSION != 3:
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
if chunker.API_VERSION != 2:
|
if chunker.API_VERSION != 2:
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
|
if compress.API_VERSION != 2:
|
||||||
|
raise ExtensionModuleError
|
||||||
if crypto.API_VERSION != 3:
|
if crypto.API_VERSION != 3:
|
||||||
raise ExtensionModuleError
|
raise ExtensionModuleError
|
||||||
if platform.API_VERSION != 3:
|
if platform.API_VERSION != 3:
|
||||||
|
|
Loading…
Reference in New Issue