add --consider-checkpoint-files option, update FAQ

This commit is contained in:
Thomas Waldmann 2016-07-21 22:24:48 +02:00
parent 0ea6745250
commit e5bd6cef20
3 changed files with 31 additions and 24 deletions

View File

@ -246,27 +246,21 @@ Once your backup has finished successfully, you can delete all
``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will ``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
also care for deleting unneeded checkpoints. also care for deleting unneeded checkpoints.
Note: the checkpointing mechanism creates hidden, partial files in an archive,
so that checkpoints even work while a big file is being processed.
They are named ``<filename>.checkpoint_<N>`` and all operations usually ignore
these files, but you can make them considered by giving the option
``--consider-checkpoint-files``. You usually only need that option if you are
really desperate (e.g. if you have no completed backup of that file and you'ld
rather get a partial file extracted than nothing). You do **not** want to give
that option under any normal circumstances.
How can I backup huge file(s) over a unstable connection? How can I backup huge file(s) over a unstable connection?
--------------------------------------------------------- ---------------------------------------------------------
You can use this "split trick" as a workaround for the in-between-files-only This is not a problem any more, see previous FAQ item.
checkpoints (see above), huge files and a instable connection to the repository:
Split the huge file(s) into parts of manageable size (e.g. 100MB) and create But please note that this only helps you for backup, not for restore.
a temporary archive of them. Borg will create checkpoints now more frequently
than if you try to backup the files in their original form (e.g. 100GB).
After that, you can remove the parts again and backup the huge file(s) in
their original form. This will now work a lot faster as a lot of content chunks
are already in the repository.
After you have successfully backed up the huge original file(s), you can remove
the temporary archive you made from the parts.
We realize that this is just a better-than-nothing workaround, see :issue:`1198`
for a potential solution.
Please note that this workaround only helps you for backup, not for restore.
If it crashes with a UnicodeError, what can I do? If it crashes with a UnicodeError, what can I do?
------------------------------------------------- -------------------------------------------------

View File

@ -231,7 +231,8 @@ class Archive:
def __init__(self, repository, key, manifest, name, cache=None, create=False, def __init__(self, repository, key, manifest, name, cache=None, create=False,
checkpoint_interval=300, numeric_owner=False, progress=False, checkpoint_interval=300, numeric_owner=False, progress=False,
chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None): chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None,
consider_checkpoint_files=False):
self.cwd = os.getcwd() self.cwd = os.getcwd()
self.key = key self.key = key
self.repository = repository self.repository = repository
@ -250,6 +251,7 @@ class Archive:
if end is None: if end is None:
end = datetime.utcnow() end = datetime.utcnow()
self.end = end self.end = end
self.consider_checkpoint_files = consider_checkpoint_files
self.pipeline = DownloadPipeline(self.repository, self.key) self.pipeline = DownloadPipeline(self.repository, self.key)
if create: if create:
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
@ -328,7 +330,10 @@ Number of files: {0.stats.nfiles}'''.format(
return 'Archive(%r)' % self.name return 'Archive(%r)' % self.name
def item_filter(self, item, filter=None): def item_filter(self, item, filter=None):
return 'checkpoint' not in item and (filter(item) if filter else True) if not self.consider_checkpoint_files and 'checkpoint' in item:
# this is a checkpoint (partial) file, we usually don't want to consider it.
return False
return filter(item) if filter else True
def iter_items(self, filter=None, preload=False): def iter_items(self, filter=None, preload=False):
for item in self.pipeline.unpack_many(self.metadata[b'items'], preload=preload, for item in self.pipeline.unpack_many(self.metadata[b'items'], preload=preload,

View File

@ -100,7 +100,8 @@ def with_archive(method):
@functools.wraps(method) @functools.wraps(method)
def wrapper(self, args, repository, key, manifest, **kwargs): def wrapper(self, args, repository, key, manifest, **kwargs):
archive = Archive(repository, key, manifest, args.location.archive, archive = Archive(repository, key, manifest, args.location.archive,
numeric_owner=getattr(args, 'numeric_owner', False), cache=kwargs.get('cache')) numeric_owner=getattr(args, 'numeric_owner', False), cache=kwargs.get('cache'),
consider_checkpoint_files=args.consider_checkpoint_files)
return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs) return method(self, args, repository=repository, manifest=manifest, key=key, archive=archive, **kwargs)
return wrapper return wrapper
@ -668,7 +669,8 @@ class Archiver:
print_output(line) print_output(line)
archive1 = archive archive1 = archive
archive2 = Archive(repository, key, manifest, args.archive2) archive2 = Archive(repository, key, manifest, args.archive2,
consider_checkpoint_files=args.consider_checkpoint_files)
can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get( can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get(
b'chunker_params', True) or args.same_chunker_params b'chunker_params', True) or args.same_chunker_params
@ -753,7 +755,8 @@ class Archiver:
with cache_if_remote(repository) as cached_repo: with cache_if_remote(repository) as cached_repo:
if args.location.archive: if args.location.archive:
archive = Archive(repository, key, manifest, args.location.archive) archive = Archive(repository, key, manifest, args.location.archive,
consider_checkpoint_files=args.consider_checkpoint_files)
else: else:
archive = None archive = None
operations = FuseOperations(key, repository, manifest, archive, cached_repo) operations = FuseOperations(key, repository, manifest, archive, cached_repo)
@ -779,7 +782,8 @@ class Archiver:
if args.location.archive: if args.location.archive:
matcher, _ = self.build_matcher(args.excludes, args.paths) matcher, _ = self.build_matcher(args.excludes, args.paths)
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache) archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
consider_checkpoint_files=args.consider_checkpoint_files)
if args.format: if args.format:
format = args.format format = args.format
@ -981,7 +985,8 @@ class Archiver:
@with_repository() @with_repository()
def do_debug_dump_archive_items(self, args, repository, manifest, key): def do_debug_dump_archive_items(self, args, repository, manifest, key):
"""dump (decrypted, decompressed) archive items metadata (not: data)""" """dump (decrypted, decompressed) archive items metadata (not: data)"""
archive = Archive(repository, key, manifest, args.location.archive) archive = Archive(repository, key, manifest, args.location.archive,
consider_checkpoint_files=args.consider_checkpoint_files)
for i, item_id in enumerate(archive.metadata[b'items']): for i, item_id in enumerate(archive.metadata[b'items']):
_, data = key.decrypt(item_id, repository.get(item_id)) _, data = key.decrypt(item_id, repository.get(item_id))
filename = '%06d_%s.items' % (i, bin_to_hex(item_id)) filename = '%06d_%s.items' % (i, bin_to_hex(item_id))
@ -1232,6 +1237,9 @@ class Archiver:
help='set umask to M (local and remote, default: %(default)04o)') help='set umask to M (local and remote, default: %(default)04o)')
common_group.add_argument('--remote-path', dest='remote_path', metavar='PATH', common_group.add_argument('--remote-path', dest='remote_path', metavar='PATH',
help='set remote path to executable (default: "borg")') help='set remote path to executable (default: "borg")')
common_group.add_argument('--consider-checkpoint-files', dest='consider_checkpoint_files',
action='store_true', default=False,
help='treat checkpoint files like normal files (e.g. to list/extract them)')
parser = argparse.ArgumentParser(prog=prog, description='Borg - Deduplicated Backups') parser = argparse.ArgumentParser(prog=prog, description='Borg - Deduplicated Backups')
parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__, parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__,