1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-25 09:19:31 +00:00

borg create --chunker-params=...

This commit is contained in:
Thomas Waldmann 2015-06-20 01:20:46 +02:00
parent 6d0a00496a
commit 3b9b976f2a
3 changed files with 25 additions and 10 deletions

View file

@ -21,10 +21,12 @@
Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int
ITEMS_BUFFER = 1024 * 1024 ITEMS_BUFFER = 1024 * 1024
CHUNK_MIN = 1024 CHUNK_MIN = 1024
CHUNK_MAX = 10 * 1024 * 1024 CHUNK_MAX = 10 * 1024 * 1024
WINDOW_SIZE = 0xfff WINDOW_SIZE = 0xfff
CHUNK_MASK = 0xffff CHUNK_MASK = 0xffff
CHUNKER_PARAMS = (WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX)
ZEROS = b'\0' * CHUNK_MAX ZEROS = b'\0' * CHUNK_MAX
@ -69,12 +71,13 @@ def fetch_many(self, ids, is_preloaded=False):
class ChunkBuffer: class ChunkBuffer:
BUFFER_SIZE = 1 * 1024 * 1024 BUFFER_SIZE = 1 * 1024 * 1024
def __init__(self, key): def __init__(self, key, chunker_params=CHUNKER_PARAMS):
self.buffer = BytesIO() self.buffer = BytesIO()
self.packer = msgpack.Packer(unicode_errors='surrogateescape') self.packer = msgpack.Packer(unicode_errors='surrogateescape')
self.chunks = [] self.chunks = []
self.key = key self.key = key
self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX,self.key.chunk_seed) chunker_params += (self.key.chunk_seed, )
self.chunker = Chunker(*chunker_params)
def add(self, item): def add(self, item):
self.buffer.write(self.packer.pack(StableDict(item))) self.buffer.write(self.packer.pack(StableDict(item)))
@ -104,8 +107,8 @@ def is_full(self):
class CacheChunkBuffer(ChunkBuffer): class CacheChunkBuffer(ChunkBuffer):
def __init__(self, cache, key, stats): def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
super(CacheChunkBuffer, self).__init__(key) super(CacheChunkBuffer, self).__init__(key, chunker_params)
self.cache = cache self.cache = cache
self.stats = stats self.stats = stats
@ -127,7 +130,8 @@ class IncompatibleFilesystemEncodingError(Error):
def __init__(self, repository, key, manifest, name, cache=None, create=False, def __init__(self, repository, key, manifest, name, cache=None, create=False,
checkpoint_interval=300, numeric_owner=False, progress=False): checkpoint_interval=300, numeric_owner=False, progress=False,
chunker_params=CHUNKER_PARAMS):
self.cwd = os.getcwd() self.cwd = os.getcwd()
self.key = key self.key = key
self.repository = repository self.repository = repository
@ -142,8 +146,9 @@ def __init__(self, repository, key, manifest, name, cache=None, create=False,
self.numeric_owner = numeric_owner self.numeric_owner = numeric_owner
self.pipeline = DownloadPipeline(self.repository, self.key) self.pipeline = DownloadPipeline(self.repository, self.key)
if create: if create:
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params)
self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX, self.key.chunk_seed) chunker_params += (self.key.chunk_seed, )
self.chunker = Chunker(*chunker_params)
if name in manifest.archives: if name in manifest.archives:
raise self.AlreadyExists(name) raise self.AlreadyExists(name)
self.last_checkpoint = time.time() self.last_checkpoint = time.time()

View file

@ -13,7 +13,7 @@
import traceback import traceback
from . import __version__ from . import __version__
from .archive import Archive, ArchiveChecker from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
from .repository import Repository from .repository import Repository
from .cache import Cache from .cache import Cache
from .key import key_creator from .key import key_creator
@ -21,7 +21,7 @@
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
is_cachedir, bigint_to_int is_cachedir, bigint_to_int, ChunkerParams
from .remote import RepositoryServer, RemoteRepository from .remote import RepositoryServer, RemoteRepository
@ -104,7 +104,8 @@ def do_create(self, args):
cache = Cache(repository, key, manifest, do_files=args.cache_files) cache = Cache(repository, key, manifest, do_files=args.cache_files)
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval, create=True, checkpoint_interval=args.checkpoint_interval,
numeric_owner=args.numeric_owner, progress=args.progress) numeric_owner=args.numeric_owner, progress=args.progress,
chunker_params=args.chunker_params)
# Add cache dir to inode_skip list # Add cache dir to inode_skip list
skip_inodes = set() skip_inodes = set()
try: try:
@ -625,6 +626,10 @@ def run(self, args=None):
metavar='yyyy-mm-ddThh:mm:ss', metavar='yyyy-mm-ddThh:mm:ss',
help='manually specify the archive creation date/time (UTC). ' help='manually specify the archive creation date/time (UTC). '
'alternatively, give a reference file/directory.') 'alternatively, give a reference file/directory.')
subparser.add_argument('--chunker-params', dest='chunker_params',
type=ChunkerParams, default=CHUNKER_PARAMS,
metavar='WINDOW_SIZE,CHUNK_MASK,CHUNK_MIN,CHUNK_MAX',
help='specify the chunker parameters. default: %r' % (CHUNKER_PARAMS, ))
subparser.add_argument('archive', metavar='ARCHIVE', subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True), type=location_validator(archive=True),
help='archive to create') help='archive to create')

View file

@ -313,6 +313,11 @@ def timestamp(s):
raise ValueError raise ValueError
def ChunkerParams(s):
window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
def is_cachedir(path): def is_cachedir(path):
"""Determines whether the specified path is a cache directory (and """Determines whether the specified path is a cache directory (and
therefore should potentially be excluded from the backup) according to therefore should potentially be excluded from the backup) according to