diff --git a/borg/archive.py b/borg/archive.py index e6d557479..a053f79e9 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -21,10 +21,12 @@ from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, grou Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int ITEMS_BUFFER = 1024 * 1024 + CHUNK_MIN = 1024 CHUNK_MAX = 10 * 1024 * 1024 WINDOW_SIZE = 0xfff CHUNK_MASK = 0xffff +CHUNKER_PARAMS = (WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX) ZEROS = b'\0' * CHUNK_MAX @@ -69,12 +71,13 @@ class DownloadPipeline: class ChunkBuffer: BUFFER_SIZE = 1 * 1024 * 1024 - def __init__(self, key): + def __init__(self, key, chunker_params=CHUNKER_PARAMS): self.buffer = BytesIO() self.packer = msgpack.Packer(unicode_errors='surrogateescape') self.chunks = [] self.key = key - self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX,self.key.chunk_seed) + chunker_params += (self.key.chunk_seed, ) + self.chunker = Chunker(*chunker_params) def add(self, item): self.buffer.write(self.packer.pack(StableDict(item))) @@ -104,8 +107,8 @@ class ChunkBuffer: class CacheChunkBuffer(ChunkBuffer): - def __init__(self, cache, key, stats): - super(CacheChunkBuffer, self).__init__(key) + def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS): + super(CacheChunkBuffer, self).__init__(key, chunker_params) self.cache = cache self.stats = stats @@ -127,7 +130,8 @@ class Archive: def __init__(self, repository, key, manifest, name, cache=None, create=False, - checkpoint_interval=300, numeric_owner=False, progress=False): + checkpoint_interval=300, numeric_owner=False, progress=False, + chunker_params=CHUNKER_PARAMS): self.cwd = os.getcwd() self.key = key self.repository = repository @@ -142,8 +146,9 @@ class Archive: self.numeric_owner = numeric_owner self.pipeline = DownloadPipeline(self.repository, self.key) if create: - self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) - self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX, self.key.chunk_seed) + self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params) + chunker_params += (self.key.chunk_seed, ) + self.chunker = Chunker(*chunker_params) if name in manifest.archives: raise self.AlreadyExists(name) self.last_checkpoint = time.time() diff --git a/borg/archiver.py b/borg/archiver.py index 9d984d5cc..3c5ada4fe 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -13,7 +13,7 @@ import textwrap import traceback from . import __version__ -from .archive import Archive, ArchiveChecker +from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .repository import Repository from .cache import Cache from .key import key_creator @@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int + is_cachedir, bigint_to_int, ChunkerParams from .remote import RepositoryServer, RemoteRepository @@ -104,7 +104,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress) + numeric_owner=args.numeric_owner, progress=args.progress, + chunker_params=args.chunker_params) # Add cache dir to inode_skip list skip_inodes = set() try: @@ -625,6 +626,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") metavar='yyyy-mm-ddThh:mm:ss', help='manually specify the archive creation date/time (UTC). ' 'alternatively, give a reference file/directory.') + subparser.add_argument('--chunker-params', dest='chunker_params', + type=ChunkerParams, default=CHUNKER_PARAMS, + metavar='WINDOW_SIZE,CHUNK_MASK,CHUNK_MIN,CHUNK_MAX', + help='specify the chunker parameters. default: %r' % (CHUNKER_PARAMS, )) subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/helpers.py b/borg/helpers.py index 0a3b84d2f..1f1612d3a 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -313,6 +313,11 @@ def timestamp(s): raise ValueError +def ChunkerParams(s): + window_size, chunk_mask, chunk_min, chunk_max = s.split(',') + return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max) + + def is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to