borg/src/borg/archiver/create_cmd.py

996 lines
46 KiB
Python

import errno
import sys
import argparse
import logging
import os
import stat
import subprocess
import time
from io import TextIOWrapper
from ._common import with_repository, Highlander
from .. import helpers
from ..archive import Archive, is_special
from ..archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
from ..cache import Cache
from ..constants import * # NOQA
from ..compress import CompressionSpec
from ..helpers import comment_validator, ChunkerParams, PathSpec
from ..helpers import archivename_validator, FilesCacheMode
from ..helpers import eval_escapes
from ..helpers import timestamp, archive_ts_now
from ..helpers import get_cache_dir, os_stat, get_strip_prefix
from ..helpers import dir_is_tagged
from ..helpers import log_multi
from ..helpers import basic_json_data, json_print
from ..helpers import flags_dir, flags_special_follow, flags_special
from ..helpers import prepare_subprocess_env
from ..helpers import sig_int, ignore_sigint
from ..helpers import iter_separated
from ..helpers import MakePathSafeAction
from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning
from ..manifest import Manifest
from ..patterns import PatternMatcher
from ..platform import is_win32
from ..platform import get_flags
from ..logger import create_logger
logger = create_logger()
class CreateMixIn:
@with_repository(exclusive=True, compatibility=(Manifest.Operation.WRITE,))
def do_create(self, args, repository, manifest):
"""Create new archive"""
key = manifest.key
matcher = PatternMatcher(fallback=True)
matcher.add_inclexcl(args.patterns)
def create_inner(archive, cache, fso):
# Add cache dir to inode_skip list
skip_inodes = set()
try:
st = os.stat(get_cache_dir())
skip_inodes.add((st.st_ino, st.st_dev))
except OSError:
pass
# Add local repository dir to inode_skip list
if not args.location.host:
try:
st = os.stat(args.location.path)
skip_inodes.add((st.st_ino, st.st_dev))
except OSError:
pass
logger.debug("Processing files ...")
if args.content_from_command:
path = args.stdin_name
mode = args.stdin_mode
user = args.stdin_user
group = args.stdin_group
if not dry_run:
try:
try:
env = prepare_subprocess_env(system=True)
proc = subprocess.Popen(
args.paths,
stdout=subprocess.PIPE,
env=env,
preexec_fn=None if is_win32 else ignore_sigint,
)
except (FileNotFoundError, PermissionError) as e:
raise CommandError(f"Failed to execute command: {e}")
status = fso.process_pipe(
path=path, cache=cache, fd=proc.stdout, mode=mode, user=user, group=group
)
rc = proc.wait()
if rc != 0:
raise CommandError(f"Command {args.paths[0]!r} exited with status {rc}")
except BackupError as e:
raise Error(f"{path!r}: {e}")
else:
status = "+" # included
self.print_file_status(status, path)
elif args.paths_from_command or args.paths_from_stdin:
paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else "\n"
if args.paths_from_command:
try:
env = prepare_subprocess_env(system=True)
proc = subprocess.Popen(
args.paths, stdout=subprocess.PIPE, env=env, preexec_fn=None if is_win32 else ignore_sigint
)
except (FileNotFoundError, PermissionError) as e:
raise CommandError(f"Failed to execute command: {e}")
pipe_bin = proc.stdout
else: # args.paths_from_stdin == True
pipe_bin = sys.stdin.buffer
pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
for path in iter_separated(pipe, paths_sep):
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io("stat"):
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
status = self._process_any(
path=path,
parent_fd=None,
name=None,
st=st,
fso=fso,
cache=cache,
read_special=args.read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
status = "E"
if status == "C":
self.print_warning_instance(FileChangedWarning(path))
self.print_file_status(status, path)
if not dry_run and status is not None:
fso.stats.files_stats[status] += 1
if args.paths_from_command:
rc = proc.wait()
if rc != 0:
raise CommandError(f"Command {args.paths[0]!r} exited with status {rc}")
else:
for path in args.paths:
if path == "": # issue #5637
self.print_warning("An empty string was given as PATH, ignoring.")
continue
if path == "-": # stdin
path = args.stdin_name
mode = args.stdin_mode
user = args.stdin_user
group = args.stdin_group
if not dry_run:
try:
status = fso.process_pipe(
path=path, cache=cache, fd=sys.stdin.buffer, mode=mode, user=user, group=group
)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
status = "E"
else:
status = "+" # included
self.print_file_status(status, path)
if not dry_run and status is not None:
fso.stats.files_stats[status] += 1
continue
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io("stat"):
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
restrict_dev = st.st_dev if args.one_file_system else None
self._rec_walk(
path=path,
parent_fd=None,
name=None,
fso=fso,
cache=cache,
matcher=matcher,
exclude_caches=args.exclude_caches,
exclude_if_present=args.exclude_if_present,
keep_exclude_tags=args.keep_exclude_tags,
skip_inodes=skip_inodes,
restrict_dev=restrict_dev,
read_special=args.read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
# if we get back here, we've finished recursing into <path>,
# we do not ever want to get back in there (even if path is given twice as recursion root)
skip_inodes.add((st.st_ino, st.st_dev))
except BackupError as e:
# this comes from os.stat, self._rec_walk has own exception handler
self.print_warning_instance(BackupWarning(path, e))
continue
if not dry_run:
if args.progress:
archive.stats.show_progress(final=True)
archive.stats += fso.stats
archive.stats.rx_bytes = getattr(repository, "rx_bytes", 0)
archive.stats.tx_bytes = getattr(repository, "tx_bytes", 0)
if sig_int:
# do not save the archive if the user ctrl-c-ed - it is valid, but incomplete.
# we already have a checkpoint archive in this case.
raise Error("Got Ctrl-C / SIGINT.")
else:
archive.save(comment=args.comment, timestamp=args.timestamp)
args.stats |= args.json
if args.stats:
if args.json:
json_print(basic_json_data(manifest, cache=cache, extra={"archive": archive}))
else:
log_multi(str(archive), str(archive.stats), logger=logging.getLogger("borg.output.stats"))
self.output_filter = args.output_filter
self.output_list = args.output_list
self.noflags = args.noflags
self.noacls = args.noacls
self.noxattrs = args.noxattrs
self.exclude_nodump = args.exclude_nodump
dry_run = args.dry_run
t0 = archive_ts_now()
t0_monotonic = time.monotonic()
logger.info('Creating archive at "%s"' % args.location.processed)
if not dry_run:
with Cache(
repository,
manifest,
progress=args.progress,
lock_wait=self.lock_wait,
permit_adhoc_cache=args.no_cache_sync,
force_adhoc_cache=args.no_cache_sync_forced,
cache_mode=args.files_cache_mode,
iec=args.iec,
) as cache:
archive = Archive(
manifest,
args.name,
cache=cache,
create=True,
numeric_ids=args.numeric_ids,
noatime=not args.atime,
noctime=args.noctime,
progress=args.progress,
chunker_params=args.chunker_params,
start=t0,
start_monotonic=t0_monotonic,
log_json=args.log_json,
iec=args.iec,
)
metadata_collector = MetadataCollector(
noatime=not args.atime,
noctime=args.noctime,
noflags=args.noflags,
noacls=args.noacls,
noxattrs=args.noxattrs,
numeric_ids=args.numeric_ids,
nobirthtime=args.nobirthtime,
)
cp = ChunksProcessor(
cache=cache,
key=key,
add_item=archive.add_item,
prepare_checkpoint=archive.prepare_checkpoint,
write_checkpoint=archive.write_checkpoint,
checkpoint_interval=args.checkpoint_interval,
checkpoint_volume=args.checkpoint_volume,
rechunkify=False,
)
fso = FilesystemObjectProcessors(
metadata_collector=metadata_collector,
cache=cache,
key=key,
process_file_chunks=cp.process_file_chunks,
add_item=archive.add_item,
chunker_params=args.chunker_params,
show_progress=args.progress,
sparse=args.sparse,
log_json=args.log_json,
iec=args.iec,
file_status_printer=self.print_file_status,
)
create_inner(archive, cache, fso)
else:
create_inner(None, None, None)
def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix):
"""
Call the right method on the given FilesystemObjectProcessor.
"""
if dry_run:
return "+" # included
MAX_RETRIES = 10 # count includes the initial try (initial try == "retry 0")
for retry in range(MAX_RETRIES):
last_try = retry == MAX_RETRIES - 1
try:
if stat.S_ISREG(st.st_mode):
return fso.process_file(
path=path,
parent_fd=parent_fd,
name=name,
st=st,
cache=cache,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISDIR(st.st_mode):
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
elif stat.S_ISLNK(st.st_mode):
if not read_special:
return fso.process_symlink(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
else:
try:
st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
except OSError:
special = False
else:
special = is_special(st_target.st_mode)
if special:
return fso.process_file(
path=path,
parent_fd=parent_fd,
name=name,
st=st_target,
cache=cache,
flags=flags_special_follow,
last_try=last_try,
strip_prefix=strip_prefix,
)
else:
return fso.process_symlink(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
elif stat.S_ISFIFO(st.st_mode):
if not read_special:
return fso.process_fifo(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
parent_fd=parent_fd,
name=name,
st=st,
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISCHR(st.st_mode):
if not read_special:
return fso.process_dev(
path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c", strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
parent_fd=parent_fd,
name=name,
st=st,
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISBLK(st.st_mode):
if not read_special:
return fso.process_dev(
path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b", strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
parent_fd=parent_fd,
name=name,
st=st,
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISSOCK(st.st_mode):
# Ignore unix sockets
return
elif stat.S_ISDOOR(st.st_mode):
# Ignore Solaris doors
return
elif stat.S_ISPORT(st.st_mode):
# Ignore Solaris event ports
return
else:
self.print_warning("Unknown file type: %s", path)
return
except BackupError as err:
if isinstance(err, BackupOSError):
if err.errno in (errno.EPERM, errno.EACCES):
# Do not try again, such errors can not be fixed by retrying.
raise
# sleep a bit, so temporary problems might go away...
sleep_s = 1000.0 / 1e6 * 10 ** (retry / 2) # retry 0: 1ms, retry 6: 1s, ...
time.sleep(sleep_s)
if retry < MAX_RETRIES - 1:
logger.warning(
f"{path}: {err}, slept {sleep_s:.3f}s, next: retry: {retry + 1} of {MAX_RETRIES - 1}..."
)
else:
# giving up with retries, error will be dealt with (logged) by upper error handler
raise
# we better do a fresh stat on the file, just to make sure to get the current file
# mode right (which could have changed due to a race condition and is important for
# dispatching) and also to get current inode number of that file.
with backup_io("stat"):
st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
def _rec_walk(
self,
*,
path,
parent_fd,
name,
fso,
cache,
matcher,
exclude_caches,
exclude_if_present,
keep_exclude_tags,
skip_inodes,
restrict_dev,
read_special,
dry_run,
strip_prefix,
):
"""
Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
This should only raise on critical errors. Per-item errors must be handled within this method.
"""
if sig_int and sig_int.action_done():
# the user says "get out of here!" and we have already completed the desired action.
return
status = None
try:
recurse_excluded_dir = False
if matcher.match(path):
with backup_io("stat"):
st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
else:
self.print_file_status("-", path) # excluded
# get out here as quickly as possible:
# we only need to continue if we shall recurse into an excluded directory.
# if we shall not recurse, then do not even touch (stat()) the item, it
# could trigger an error, e.g. if access is forbidden, see #3209.
if not matcher.recurse_dir:
return
recurse_excluded_dir = True
with backup_io("stat"):
st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
if not stat.S_ISDIR(st.st_mode):
return
if (st.st_ino, st.st_dev) in skip_inodes:
return
# if restrict_dev is given, we do not want to recurse into a new filesystem,
# but we WILL save the mountpoint directory (or more precise: the root
# directory of the mounted filesystem that shadows the mountpoint dir).
recurse = restrict_dev is None or st.st_dev == restrict_dev
if self.exclude_nodump:
# Ignore if nodump flag is set
with backup_io("flags"):
if get_flags(path=path, st=st) & stat.UF_NODUMP:
self.print_file_status("-", path) # excluded
return
if not stat.S_ISDIR(st.st_mode):
# directories cannot go in this branch because they can be excluded based on tag
# files they might contain
status = self._process_any(
path=path,
parent_fd=parent_fd,
name=name,
st=st,
fso=fso,
cache=cache,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
else:
with OsOpen(
path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open"
) as child_fd:
# child_fd is None for directories on windows, in that case a race condition check is not possible.
if child_fd is not None:
with backup_io("fstat"):
st = stat_update_check(st, os.fstat(child_fd))
if recurse:
tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
if tag_names:
# if we are already recursing in an excluded dir, we do not need to do anything else than
# returning (we do not need to archive or recurse into tagged directories), see #3991:
if not recurse_excluded_dir:
if keep_exclude_tags:
if not dry_run:
fso.process_dir_with_fd(
path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
)
for tag_name in tag_names:
tag_path = os.path.join(path, tag_name)
self._rec_walk(
path=tag_path,
parent_fd=child_fd,
name=tag_name,
fso=fso,
cache=cache,
matcher=matcher,
exclude_caches=exclude_caches,
exclude_if_present=exclude_if_present,
keep_exclude_tags=keep_exclude_tags,
skip_inodes=skip_inodes,
restrict_dev=restrict_dev,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
self.print_file_status("-", path) # excluded
return
if not recurse_excluded_dir:
if not dry_run:
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
else:
status = "+" # included (dir)
if recurse:
with backup_io("scandir"):
entries = helpers.scandir_inorder(path=path, fd=child_fd)
for dirent in entries:
normpath = os.path.normpath(os.path.join(path, dirent.name))
self._rec_walk(
path=normpath,
parent_fd=child_fd,
name=dirent.name,
fso=fso,
cache=cache,
matcher=matcher,
exclude_caches=exclude_caches,
exclude_if_present=exclude_if_present,
keep_exclude_tags=keep_exclude_tags,
skip_inodes=skip_inodes,
restrict_dev=restrict_dev,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
status = "E"
if status == "C":
self.print_warning_instance(FileChangedWarning(path))
if not recurse_excluded_dir:
self.print_file_status(status, path)
if not dry_run and status is not None:
fso.stats.files_stats[status] += 1
def build_parser_create(self, subparsers, common_parser, mid_common_parser):
from ._common import process_epilog
from ._common import define_exclusion_group
create_epilog = process_epilog(
"""
This command creates a backup archive containing all files found while recursively
traversing all paths specified. Paths are added to the archive as they are given,
that means if relative paths are desired, the command has to be run from the correct
directory.
The slashdot hack in paths (recursion roots) is triggered by using ``/./``:
``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but
strip the prefix on the left side of ``./`` from the archived items (in this case,
``this/gets/archived`` will be the path in the archived item).
When giving '-' as path, borg will read data from standard input and create a
file 'stdin' in the created archive from that data. In some cases it's more
appropriate to use --content-from-command, however. See section *Reading from
stdin* below for details.
The archive will consume almost no disk space for files or parts of files that
have already been stored in other archives.
The archive name needs to be unique. It must not end in '.checkpoint' or
'.checkpoint.N' (with N being a number), because these names are used for
checkpoints and treated in special ways.
In the archive name, you may use the following placeholders:
{now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.
Backup speed is increased by not reprocessing files that are already part of
existing archives and weren't modified. The detection of unmodified files is
done by comparing multiple file metadata values with previous values kept in
the files cache.
This comparison can operate in different modes as given by ``--files-cache``:
- ctime,size,inode (default)
- mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4)
- ctime,size (ignore the inode number)
- mtime,size (ignore the inode number)
- rechunk,ctime (all files are considered modified - rechunk, cache ctime)
- rechunk,mtime (all files are considered modified - rechunk, cache mtime)
- disabled (disable the files cache, all files considered modified - rechunk)
inode number: better safety, but often unstable on network filesystems
Normally, detecting file modifications will take inode information into
consideration to improve the reliability of file change detection.
This is problematic for files located on sshfs and similar network file
systems which do not provide stable inode numbers, such files will always
be considered modified. You can use modes without `inode` in this case to
improve performance, but reliability of change detection might be reduced.
ctime vs. mtime: safety vs. speed
- ctime is a rather safe way to detect changes to a file (metadata and contents)
as it can not be set from userspace. But, a metadata-only change will already
update the ctime, so there might be some unnecessary chunking/hashing even
without content changes. Some filesystems do not support ctime (change time).
E.g. doing a chown or chmod to a file will change its ctime.
- mtime usually works and only updates if file contents were changed. But mtime
can be arbitrarily set from userspace, e.g. to set mtime back to the same value
it had before a content change happened. This can be used maliciously as well as
well-meant, but in both cases mtime based cache modes can be problematic.
The mount points of filesystems or filesystem snapshots should be the same for every
creation of a new archive to ensure fast operation. This is because the file cache that
is used to determine changed files quickly uses absolute filenames.
If this is not possible, consider creating a bind mount to a stable location.
The ``--progress`` option shows (from left to right) Original and (uncompressed)
deduplicated size (O and U respectively), then the Number of files (N) processed so far,
followed by the currently processed path.
When using ``--stats``, you will get some statistics about how much data was
added - the "This Archive" deduplicated size there is most interesting as that is
how much your repository will grow. Please note that the "All archives" stats refer to
the state after creation. Also, the ``--stats`` and ``--dry-run`` options are mutually
exclusive because the data is not actually compressed and deduplicated during a dry run.
For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
For more help on placeholders, see the :ref:`borg_placeholders` command output.
.. man NOTES
The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will
exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude``
'\\*/.bundler/gems' to get the same effect.
In addition to using ``--exclude`` patterns, it is possible to use
``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file
or folder name) which, when contained within another folder, will prevent the
containing folder from being backed up. By default, the containing folder and
all of its contents will be omitted from the backup. If, however, you wish to
only include the objects specified by ``--exclude-if-present`` in your backup,
and not include any other contents of the containing folder, this can be enabled
through using the ``--keep-exclude-tags`` option.
The ``-x`` or ``--one-file-system`` option excludes directories, that are mountpoints (and everything in them).
It detects mountpoints by comparing the device number from the output of ``stat()`` of the directory and its
parent directory. Specifically, it excludes directories for which ``stat()`` reports a device number different
from the device number of their parent.
In general: be aware that there are directories with device number different from their parent, which the kernel
does not consider a mountpoint and also the other way around.
Linux examples for this are bind mounts (possibly same device number, but always a mountpoint) and ALL
subvolumes of a btrfs (different device number from parent but not necessarily a mountpoint).
macOS examples are the apfs mounts of a typical macOS installation.
Therefore, when using ``--one-file-system``, you should double-check that the backup works as intended.
.. _list_item_flags:
Item flags
++++++++++
``--list`` outputs a list of all files, directories and other
file system items it considered (no matter whether they had content changes
or not). For each item, it prefixes a single-letter flag that indicates type
and/or status of the item.
If you are interested only in a subset of that output, you can give e.g.
``--filter=AME`` and it will only show regular files with A, M or E status (see
below).
A uppercase character represents the status of a regular file relative to the
"files" cache (not relative to the repo -- this is an issue if the files cache
is not used). Metadata is stored in any case and for 'A' and 'M' also new data
chunks are stored. For 'U' all data chunks refer to already existing chunks.
- 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ)
- 'M' = regular file, modified
- 'U' = regular file, unchanged
- 'C' = regular file, it changed while we backed it up
- 'E' = regular file, an error happened while accessing/reading *this* file
A lowercase character means a file type other than a regular file,
borg usually just stores their metadata:
- 'd' = directory
- 'b' = block device
- 'c' = char device
- 'h' = regular file, hardlink (to already seen inodes)
- 's' = symlink
- 'f' = fifo
Other flags used include:
- '+' = included, item would be backed up (if not in dry-run mode)
- '-' = excluded, item would not be / was not backed up
- 'i' = backup data was read from standard input (stdin)
- '?' = missing status code (if you see this, please file a bug report!)
Reading backup data from stdin
++++++++++++++++++++++++++++++
There are two methods to read from stdin. Either specify ``-`` as path and
pipe directly to borg::
backup-vm --id myvm --stdout | borg create REPO::ARCHIVE -
Or use ``--content-from-command`` to have Borg manage the execution of the
command and piping. If you do so, the first PATH argument is interpreted
as command to execute and any further arguments are treated as arguments
to the command::
borg create --content-from-command REPO::ARCHIVE -- backup-vm --id myvm --stdout
``--`` is used to ensure ``--id`` and ``--stdout`` are **not** considered
arguments to ``borg`` but rather ``backup-vm``.
The difference between the two approaches is that piping to borg creates an
archive even if the command piping to borg exits with a failure. In this case,
**one can end up with truncated output being backed up**. Using
``--content-from-command``, in contrast, borg is guaranteed to fail without
creating an archive should the command fail. The command is considered failed
when it returned a non-zero exit code.
Reading from stdin yields just a stream of data without file metadata
associated with it, and the files cache is not needed at all. So it is
safe to disable it via ``--files-cache disabled`` and speed up backup
creation a bit.
By default, the content read from stdin is stored in a file called 'stdin'.
Use ``--stdin-name`` to change the name.
Feeding all file paths from externally
++++++++++++++++++++++++++++++++++++++
Usually, you give a starting path (recursion root) to borg and then borg
automatically recurses, finds and backs up all fs objects contained in
there (optionally considering include/exclude rules).
If you need more control and you want to give every single fs object path
to borg (maybe implementing your own recursion or your own rules), you can use
``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will
fail to create an archive should the command fail).
Borg supports paths with the slashdot hack to strip path prefixes here also.
So, be careful not to unintentionally trigger that.
"""
)
subparser = subparsers.add_parser(
"create",
parents=[common_parser],
add_help=False,
description=self.do_create.__doc__,
epilog=create_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help="create backup",
)
subparser.set_defaults(func=self.do_create)
# note: --dry-run and --stats are mutually exclusive, but we do not want to abort when
# parsing, but rather proceed with the dry-run, but without stats (see run() method).
subparser.add_argument(
"-n", "--dry-run", dest="dry_run", action="store_true", help="do not create a backup archive"
)
subparser.add_argument(
"-s", "--stats", dest="stats", action="store_true", help="print statistics for the created archive"
)
subparser.add_argument(
"--list", dest="output_list", action="store_true", help="output verbose list of items (files, dirs, ...)"
)
subparser.add_argument(
"--filter",
metavar="STATUSCHARS",
dest="output_filter",
action=Highlander,
help="only display items with the given status characters (see description)",
)
subparser.add_argument("--json", action="store_true", help="output stats as JSON. Implies ``--stats``.")
subparser.add_argument(
"--no-cache-sync",
dest="no_cache_sync",
action="store_true",
help="experimental: do not synchronize the cache. Implies not using the files cache.",
)
subparser.add_argument(
"--no-cache-sync-forced",
dest="no_cache_sync_forced",
action="store_true",
help="experimental: do not synchronize the cache (forced). Implies not using the files cache.",
)
subparser.add_argument(
"--stdin-name",
metavar="NAME",
dest="stdin_name",
default="stdin",
action=MakePathSafeAction,
help="use NAME in archive for stdin data (default: %(default)r)",
)
subparser.add_argument(
"--stdin-user",
metavar="USER",
dest="stdin_user",
default=None,
action=Highlander,
help="set user USER in archive for stdin data (default: do not store user/uid)",
)
subparser.add_argument(
"--stdin-group",
metavar="GROUP",
dest="stdin_group",
default=None,
action=Highlander,
help="set group GROUP in archive for stdin data (default: do not store group/gid)",
)
subparser.add_argument(
"--stdin-mode",
metavar="M",
dest="stdin_mode",
type=lambda s: int(s, 8),
default=STDIN_MODE_DEFAULT,
action=Highlander,
help="set mode to M in archive for stdin data (default: %(default)04o)",
)
subparser.add_argument(
"--content-from-command",
action="store_true",
help="interpret PATH as command and store its stdout. See also section Reading from" " stdin below.",
)
subparser.add_argument(
"--paths-from-stdin",
action="store_true",
help="read DELIM-separated list of paths to back up from stdin. All control is external: it will back"
" up all files given - no more, no less.",
)
subparser.add_argument(
"--paths-from-command",
action="store_true",
help="interpret PATH as command and treat its output as ``--paths-from-stdin``",
)
subparser.add_argument(
"--paths-delimiter",
action=Highlander,
metavar="DELIM",
help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ",
)
exclude_group = define_exclusion_group(subparser, tag_files=True)
exclude_group.add_argument(
"--exclude-nodump", dest="exclude_nodump", action="store_true", help="exclude files flagged NODUMP"
)
fs_group = subparser.add_argument_group("Filesystem options")
fs_group.add_argument(
"-x",
"--one-file-system",
dest="one_file_system",
action="store_true",
help="stay in the same file system and do not store mount points of other file systems - "
"this might behave different from your expectations, see the description below.",
)
fs_group.add_argument(
"--numeric-ids",
dest="numeric_ids",
action="store_true",
help="only store numeric user and group identifiers",
)
fs_group.add_argument("--atime", dest="atime", action="store_true", help="do store atime into archive")
fs_group.add_argument("--noctime", dest="noctime", action="store_true", help="do not store ctime into archive")
fs_group.add_argument(
"--nobirthtime",
dest="nobirthtime",
action="store_true",
help="do not store birthtime (creation date) into archive",
)
fs_group.add_argument(
"--noflags",
dest="noflags",
action="store_true",
help="do not read and store flags (e.g. NODUMP, IMMUTABLE) into archive",
)
fs_group.add_argument(
"--noacls", dest="noacls", action="store_true", help="do not read and store ACLs into archive"
)
fs_group.add_argument(
"--noxattrs", dest="noxattrs", action="store_true", help="do not read and store xattrs into archive"
)
fs_group.add_argument(
"--sparse",
dest="sparse",
action="store_true",
help="detect sparse holes in input (supported only by fixed chunker)",
)
fs_group.add_argument(
"--files-cache",
metavar="MODE",
dest="files_cache_mode",
action=Highlander,
type=FilesCacheMode,
default=FILES_CACHE_MODE_UI_DEFAULT,
help="operate files cache in MODE. default: %s" % FILES_CACHE_MODE_UI_DEFAULT,
)
fs_group.add_argument(
"--read-special",
dest="read_special",
action="store_true",
help="open and read block and char device files as well as FIFOs as if they were "
"regular files. Also follows symlinks pointing to these kinds of files.",
)
archive_group = subparser.add_argument_group("Archive options")
archive_group.add_argument(
"--comment",
metavar="COMMENT",
dest="comment",
type=comment_validator,
default="",
action=Highlander,
help="add a comment text to the archive",
)
archive_group.add_argument(
"--timestamp",
metavar="TIMESTAMP",
dest="timestamp",
type=timestamp,
default=None,
action=Highlander,
help="manually specify the archive creation date/time (yyyy-mm-ddThh:mm:ss[(+|-)HH:MM] format, "
"(+|-)HH:MM is the UTC offset, default: local time zone). Alternatively, give a reference file/directory.",
)
archive_group.add_argument(
"-c",
"--checkpoint-interval",
metavar="SECONDS",
dest="checkpoint_interval",
type=int,
default=1800,
action=Highlander,
help="write checkpoint every SECONDS seconds (Default: 1800)",
)
archive_group.add_argument(
"--checkpoint-volume",
metavar="BYTES",
dest="checkpoint_volume",
type=int,
default=0,
action=Highlander,
help="write checkpoint every BYTES bytes (Default: 0, meaning no volume based checkpointing)",
)
archive_group.add_argument(
"--chunker-params",
metavar="PARAMS",
dest="chunker_params",
type=ChunkerParams,
default=CHUNKER_PARAMS,
action=Highlander,
help="specify the chunker parameters (ALGO, CHUNK_MIN_EXP, CHUNK_MAX_EXP, "
"HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %s,%d,%d,%d,%d" % CHUNKER_PARAMS,
)
archive_group.add_argument(
"-C",
"--compression",
metavar="COMPRESSION",
dest="compression",
type=CompressionSpec,
default=CompressionSpec("lz4"),
action=Highlander,
help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
)
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=PathSpec, action="extend", help="paths to archive"
)