borg/src/borg/archiver/create_cmd.py

import errno
import sys
import argparse
import logging
import os
import stat
import subprocess
import time
from io import TextIOWrapper

from ._common import with_repository, Highlander
from .. import helpers
from ..archive import Archive, is_special
from ..archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
from ..cache import Cache
from ..constants import *  # NOQA
from ..compress import CompressionSpec
from ..helpers import comment_validator, ChunkerParams, PathSpec
from ..helpers import archivename_validator, FilesCacheMode
from ..helpers import eval_escapes
from ..helpers import timestamp, archive_ts_now
from ..helpers import get_cache_dir, os_stat, get_strip_prefix
from ..helpers import dir_is_tagged
from ..helpers import log_multi
from ..helpers import basic_json_data, json_print
from ..helpers import flags_dir, flags_special_follow, flags_special
from ..helpers import prepare_subprocess_env
from ..helpers import sig_int, ignore_sigint
from ..helpers import iter_separated
from ..helpers import MakePathSafeAction
from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning
from ..manifest import Manifest
from ..patterns import PatternMatcher
from ..platform import is_win32
from ..platform import get_flags

from ..logger import create_logger

logger = create_logger()


class CreateMixIn:
    @with_repository(exclusive=True, compatibility=(Manifest.Operation.WRITE,))
    def do_create(self, args, repository, manifest):
        """Create new archive"""
        key = manifest.key
        matcher = PatternMatcher(fallback=True)
        matcher.add_inclexcl(args.patterns)

        def create_inner(archive, cache, fso):
            # Add cache dir to inode_skip list
            skip_inodes = set()
            try:
                st = os.stat(get_cache_dir())
                skip_inodes.add((st.st_ino, st.st_dev))
            except OSError:
                pass
            # Add local repository dir to inode_skip list
            if not args.location.host:
                try:
                    st = os.stat(args.location.path)
                    skip_inodes.add((st.st_ino, st.st_dev))
                except OSError:
                    pass
            logger.debug("Processing files ...")
            if args.content_from_command:
                path = args.stdin_name
                mode = args.stdin_mode
                user = args.stdin_user
                group = args.stdin_group
                if not dry_run:
                    try:
                        try:
                            env = prepare_subprocess_env(system=True)
                            proc = subprocess.Popen(
                                args.paths,
                                stdout=subprocess.PIPE,
                                env=env,
                                preexec_fn=None if is_win32 else ignore_sigint,
                            )
                        except (FileNotFoundError, PermissionError) as e:
                            raise CommandError(f"Failed to execute command: {e}")
                        status = fso.process_pipe(
                            path=path, cache=cache, fd=proc.stdout, mode=mode, user=user, group=group
                        )
                        rc = proc.wait()
                        if rc != 0:
                            raise CommandError(f"Command {args.paths[0]!r} exited with status {rc}")
                    except BackupError as e:
                        raise Error(f"{path!r}: {e}")
                else:
                    status = "+"  # included
                self.print_file_status(status, path)
            elif args.paths_from_command or args.paths_from_stdin:
                paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else "\n"
                if args.paths_from_command:
                    try:
                        env = prepare_subprocess_env(system=True)
                        proc = subprocess.Popen(
                            args.paths, stdout=subprocess.PIPE, env=env, preexec_fn=None if is_win32 else ignore_sigint
                        )
                    except (FileNotFoundError, PermissionError) as e:
                        raise CommandError(f"Failed to execute command: {e}")
                    pipe_bin = proc.stdout
                else:  # args.paths_from_stdin == True
                    pipe_bin = sys.stdin.buffer
                pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
                for path in iter_separated(pipe, paths_sep):
                    strip_prefix = get_strip_prefix(path)
                    path = os.path.normpath(path)
                    try:
                        with backup_io("stat"):
                            st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
                        status = self._process_any(
                            path=path,
                            parent_fd=None,
                            name=None,
                            st=st,
                            fso=fso,
                            cache=cache,
                            read_special=args.read_special,
                            dry_run=dry_run,
                            strip_prefix=strip_prefix,
                        )
                    except BackupError as e:
                        self.print_warning_instance(BackupWarning(path, e))
                        status = "E"
                    if status == "C":
                        self.print_warning_instance(FileChangedWarning(path))
                    self.print_file_status(status, path)
                    if not dry_run and status is not None:
                        fso.stats.files_stats[status] += 1
                if args.paths_from_command:
                    rc = proc.wait()
                    if rc != 0:
                        raise CommandError(f"Command {args.paths[0]!r} exited with status {rc}")
            else:
                for path in args.paths:
                    if path == "":  # issue #5637
                        self.print_warning("An empty string was given as PATH, ignoring.")
                        continue
                    if path == "-":  # stdin
                        path = args.stdin_name
                        mode = args.stdin_mode
                        user = args.stdin_user
                        group = args.stdin_group
                        if not dry_run:
                            try:
                                status = fso.process_pipe(
                                    path=path, cache=cache, fd=sys.stdin.buffer, mode=mode, user=user, group=group
                                )
                            except BackupError as e:
                                self.print_warning_instance(BackupWarning(path, e))
                                status = "E"
                        else:
                            status = "+"  # included
                        self.print_file_status(status, path)
                        if not dry_run and status is not None:
                            fso.stats.files_stats[status] += 1
                        continue

                    strip_prefix = get_strip_prefix(path)
                    path = os.path.normpath(path)
                    try:
                        with backup_io("stat"):
                            st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
                        restrict_dev = st.st_dev if args.one_file_system else None
                        self._rec_walk(
                            path=path,
                            parent_fd=None,
                            name=None,
                            fso=fso,
                            cache=cache,
                            matcher=matcher,
                            exclude_caches=args.exclude_caches,
                            exclude_if_present=args.exclude_if_present,
                            keep_exclude_tags=args.keep_exclude_tags,
                            skip_inodes=skip_inodes,
                            restrict_dev=restrict_dev,
                            read_special=args.read_special,
                            dry_run=dry_run,
                            strip_prefix=strip_prefix,
                        )
                        # if we get back here, we've finished recursing into <path>,
                        # we do not ever want to get back in there (even if path is given twice as recursion root)
                        skip_inodes.add((st.st_ino, st.st_dev))
                    except BackupError as e:
                        # this comes from os.stat, self._rec_walk has own exception handler
                        self.print_warning_instance(BackupWarning(path, e))
                        continue
            if not dry_run:
                if args.progress:
                    archive.stats.show_progress(final=True)
                archive.stats += fso.stats
                archive.stats.rx_bytes = getattr(repository, "rx_bytes", 0)
                archive.stats.tx_bytes = getattr(repository, "tx_bytes", 0)
                if sig_int:
                    # do not save the archive if the user ctrl-c-ed - it is valid, but incomplete.
                    # we already have a checkpoint archive in this case.
                    raise Error("Got Ctrl-C / SIGINT.")
                else:
                    archive.save(comment=args.comment, timestamp=args.timestamp)
                    args.stats |= args.json
                    if args.stats:
                        if args.json:
                            json_print(basic_json_data(manifest, cache=cache, extra={"archive": archive}))
                        else:
                            log_multi(str(archive), str(archive.stats), logger=logging.getLogger("borg.output.stats"))

        self.output_filter = args.output_filter
        self.output_list = args.output_list
        self.noflags = args.noflags
        self.noacls = args.noacls
        self.noxattrs = args.noxattrs
        self.exclude_nodump = args.exclude_nodump
        dry_run = args.dry_run
        t0 = archive_ts_now()
        t0_monotonic = time.monotonic()
        logger.info('Creating archive at "%s"' % args.location.processed)
        if not dry_run:
            with Cache(
                repository,
                manifest,
                progress=args.progress,
                lock_wait=self.lock_wait,
                permit_adhoc_cache=args.no_cache_sync,
                force_adhoc_cache=args.no_cache_sync_forced,
                cache_mode=args.files_cache_mode,
                iec=args.iec,
            ) as cache:
                archive = Archive(
                    manifest,
                    args.name,
                    cache=cache,
                    create=True,
                    numeric_ids=args.numeric_ids,
                    noatime=not args.atime,
                    noctime=args.noctime,
                    progress=args.progress,
                    chunker_params=args.chunker_params,
                    start=t0,
                    start_monotonic=t0_monotonic,
                    log_json=args.log_json,
                    iec=args.iec,
                )
                metadata_collector = MetadataCollector(
                    noatime=not args.atime,
                    noctime=args.noctime,
                    noflags=args.noflags,
                    noacls=args.noacls,
                    noxattrs=args.noxattrs,
                    numeric_ids=args.numeric_ids,
                    nobirthtime=args.nobirthtime,
                )
                cp = ChunksProcessor(
                    cache=cache,
                    key=key,
                    add_item=archive.add_item,
                    prepare_checkpoint=archive.prepare_checkpoint,
                    write_checkpoint=archive.write_checkpoint,
                    checkpoint_interval=args.checkpoint_interval,
                    checkpoint_volume=args.checkpoint_volume,
                    rechunkify=False,
                )
                fso = FilesystemObjectProcessors(
                    metadata_collector=metadata_collector,
                    cache=cache,
                    key=key,
                    process_file_chunks=cp.process_file_chunks,
                    add_item=archive.add_item,
                    chunker_params=args.chunker_params,
                    show_progress=args.progress,
                    sparse=args.sparse,
                    log_json=args.log_json,
                    iec=args.iec,
                    file_status_printer=self.print_file_status,
                )
                create_inner(archive, cache, fso)
        else:
            create_inner(None, None, None)

    def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix):
        """
        Call the right method on the given FilesystemObjectProcessor.
        """

        if dry_run:
            return "+"  # included
        MAX_RETRIES = 10  # count includes the initial try (initial try == "retry 0")
        for retry in range(MAX_RETRIES):
            last_try = retry == MAX_RETRIES - 1
            try:
                if stat.S_ISREG(st.st_mode):
                    return fso.process_file(
                        path=path,
                        parent_fd=parent_fd,
                        name=name,
                        st=st,
                        cache=cache,
                        last_try=last_try,
                        strip_prefix=strip_prefix,
                    )
                elif stat.S_ISDIR(st.st_mode):
                    return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
                elif stat.S_ISLNK(st.st_mode):
                    if not read_special:
                        return fso.process_symlink(
                            path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
                        )
                    else:
                        try:
                            st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
                        except OSError:
                            special = False
                        else:
                            special = is_special(st_target.st_mode)
                        if special:
                            return fso.process_file(
                                path=path,
                                parent_fd=parent_fd,
                                name=name,
                                st=st_target,
                                cache=cache,
                                flags=flags_special_follow,
                                last_try=last_try,
                                strip_prefix=strip_prefix,
                            )
                        else:
                            return fso.process_symlink(
                                path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
                            )
                elif stat.S_ISFIFO(st.st_mode):
                    if not read_special:
                        return fso.process_fifo(
                            path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
                        )
                    else:
                        return fso.process_file(
                            path=path,
                            parent_fd=parent_fd,
                            name=name,
                            st=st,
                            cache=cache,
                            flags=flags_special,
                            last_try=last_try,
                            strip_prefix=strip_prefix,
                        )
                elif stat.S_ISCHR(st.st_mode):
                    if not read_special:
                        return fso.process_dev(
                            path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c", strip_prefix=strip_prefix
                        )
                    else:
                        return fso.process_file(
                            path=path,
                            parent_fd=parent_fd,
                            name=name,
                            st=st,
                            cache=cache,
                            flags=flags_special,
                            last_try=last_try,
                            strip_prefix=strip_prefix,
                        )
                elif stat.S_ISBLK(st.st_mode):
                    if not read_special:
                        return fso.process_dev(
                            path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b", strip_prefix=strip_prefix
                        )
                    else:
                        return fso.process_file(
                            path=path,
                            parent_fd=parent_fd,
                            name=name,
                            st=st,
                            cache=cache,
                            flags=flags_special,
                            last_try=last_try,
                            strip_prefix=strip_prefix,
                        )
                elif stat.S_ISSOCK(st.st_mode):
                    # Ignore unix sockets
                    return
                elif stat.S_ISDOOR(st.st_mode):
                    # Ignore Solaris doors
                    return
                elif stat.S_ISPORT(st.st_mode):
                    # Ignore Solaris event ports
                    return
                else:
                    self.print_warning("Unknown file type: %s", path)
                    return
            except BackupError as err:
                if isinstance(err, BackupOSError):
                    if err.errno in (errno.EPERM, errno.EACCES):
                        # Do not try again, such errors can not be fixed by retrying.
                        raise
                # sleep a bit, so temporary problems might go away...
                sleep_s = 1000.0 / 1e6 * 10 ** (retry / 2)  # retry 0: 1ms, retry 6: 1s, ...
                time.sleep(sleep_s)
                if retry < MAX_RETRIES - 1:
                    logger.warning(
                        f"{path}: {err}, slept {sleep_s:.3f}s, next: retry: {retry + 1} of {MAX_RETRIES - 1}..."
                    )
                else:
                    # giving up with retries, error will be dealt with (logged) by upper error handler
                    raise
                # we better do a fresh stat on the file, just to make sure to get the current file
                # mode right (which could have changed due to a race condition and is important for
                # dispatching) and also to get current inode number of that file.
                with backup_io("stat"):
                    st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)

    def _rec_walk(
        self,
        *,
        path,
        parent_fd,
        name,
        fso,
        cache,
        matcher,
        exclude_caches,
        exclude_if_present,
        keep_exclude_tags,
        skip_inodes,
        restrict_dev,
        read_special,
        dry_run,
        strip_prefix,
    ):
        """
        Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.

        This should only raise on critical errors. Per-item errors must be handled within this method.
        """
        if sig_int and sig_int.action_done():
            # the user says "get out of here!" and we have already completed the desired action.
            return

        status = None
        try:
            recurse_excluded_dir = False
            if matcher.match(path):
                with backup_io("stat"):
                    st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
            else:
                self.print_file_status("-", path)  # excluded
                # get out here as quickly as possible:
                # we only need to continue if we shall recurse into an excluded directory.
                # if we shall not recurse, then do not even touch (stat()) the item, it
                # could trigger an error, e.g. if access is forbidden, see #3209.
                if not matcher.recurse_dir:
                    return
                recurse_excluded_dir = True
                with backup_io("stat"):
                    st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
                if not stat.S_ISDIR(st.st_mode):
                    return

            if (st.st_ino, st.st_dev) in skip_inodes:
                return
            # if restrict_dev is given, we do not want to recurse into a new filesystem,
            # but we WILL save the mountpoint directory (or more precise: the root
            # directory of the mounted filesystem that shadows the mountpoint dir).
            recurse = restrict_dev is None or st.st_dev == restrict_dev

            if self.exclude_nodump:
                # Ignore if nodump flag is set
                with backup_io("flags"):
                    if get_flags(path=path, st=st) & stat.UF_NODUMP:
                        self.print_file_status("-", path)  # excluded
                        return

            if not stat.S_ISDIR(st.st_mode):
                # directories cannot go in this branch because they can be excluded based on tag
                # files they might contain
                status = self._process_any(
                    path=path,
                    parent_fd=parent_fd,
                    name=name,
                    st=st,
                    fso=fso,
                    cache=cache,
                    read_special=read_special,
                    dry_run=dry_run,
                    strip_prefix=strip_prefix,
                )
            else:
                with OsOpen(
                    path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open"
                ) as child_fd:
                    # child_fd is None for directories on windows, in that case a race condition check is not possible.
                    if child_fd is not None:
                        with backup_io("fstat"):
                            st = stat_update_check(st, os.fstat(child_fd))
                    if recurse:
                        tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
                        if tag_names:
                            # if we are already recursing in an excluded dir, we do not need to do anything else than
                            # returning (we do not need to archive or recurse into tagged directories), see #3991:
                            if not recurse_excluded_dir:
                                if keep_exclude_tags:
                                    if not dry_run:
                                        fso.process_dir_with_fd(
                                            path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
                                        )
                                    for tag_name in tag_names:
                                        tag_path = os.path.join(path, tag_name)
                                        self._rec_walk(
                                            path=tag_path,
                                            parent_fd=child_fd,
                                            name=tag_name,
                                            fso=fso,
                                            cache=cache,
                                            matcher=matcher,
                                            exclude_caches=exclude_caches,
                                            exclude_if_present=exclude_if_present,
                                            keep_exclude_tags=keep_exclude_tags,
                                            skip_inodes=skip_inodes,
                                            restrict_dev=restrict_dev,
                                            read_special=read_special,
                                            dry_run=dry_run,
                                            strip_prefix=strip_prefix,
                                        )
                                self.print_file_status("-", path)  # excluded
                            return
                    if not recurse_excluded_dir:
                        if not dry_run:
                            status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
                        else:
                            status = "+"  # included (dir)
                    if recurse:
                        with backup_io("scandir"):
                            entries = helpers.scandir_inorder(path=path, fd=child_fd)
                        for dirent in entries:
                            normpath = os.path.normpath(os.path.join(path, dirent.name))
                            self._rec_walk(
                                path=normpath,
                                parent_fd=child_fd,
                                name=dirent.name,
                                fso=fso,
                                cache=cache,
                                matcher=matcher,
                                exclude_caches=exclude_caches,
                                exclude_if_present=exclude_if_present,
                                keep_exclude_tags=keep_exclude_tags,
                                skip_inodes=skip_inodes,
                                restrict_dev=restrict_dev,
                                read_special=read_special,
                                dry_run=dry_run,
                                strip_prefix=strip_prefix,
                            )

        except BackupError as e:
            self.print_warning_instance(BackupWarning(path, e))
            status = "E"
        if status == "C":
            self.print_warning_instance(FileChangedWarning(path))
        if not recurse_excluded_dir:
            self.print_file_status(status, path)
            if not dry_run and status is not None:
                fso.stats.files_stats[status] += 1

    def build_parser_create(self, subparsers, common_parser, mid_common_parser):
        from ._common import process_epilog
        from ._common import define_exclusion_group

        create_epilog = process_epilog(
            """
        This command creates a backup archive containing all files found while recursively
        traversing all paths specified. Paths are added to the archive as they are given,
        that means if relative paths are desired, the command has to be run from the correct
        directory.

        The slashdot hack in paths (recursion roots) is triggered by using ``/./``:
        ``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but
        strip the prefix on the left side of ``./`` from the archived items (in this case,
        ``this/gets/archived`` will be the path in the archived item).

        When giving '-' as path, borg will read data from standard input and create a
        file 'stdin' in the created archive from that data. In some cases it's more
        appropriate to use --content-from-command, however. See section *Reading from
        stdin* below for details.

        The archive will consume almost no disk space for files or parts of files that
        have already been stored in other archives.

        The archive name needs to be unique. It must not end in '.checkpoint' or
        '.checkpoint.N' (with N being a number), because these names are used for
        checkpoints and treated in special ways.

        In the archive name, you may use the following placeholders:
        {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.

        Backup speed is increased by not reprocessing files that are already part of
        existing archives and weren't modified. The detection of unmodified files is
        done by comparing multiple file metadata values with previous values kept in
        the files cache.

        This comparison can operate in different modes as given by ``--files-cache``:

        - ctime,size,inode (default)
        - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4)
        - ctime,size (ignore the inode number)
        - mtime,size (ignore the inode number)
        - rechunk,ctime (all files are considered modified - rechunk, cache ctime)
        - rechunk,mtime (all files are considered modified - rechunk, cache mtime)
        - disabled (disable the files cache, all files considered modified - rechunk)

        inode number: better safety, but often unstable on network filesystems

        Normally, detecting file modifications will take inode information into
        consideration to improve the reliability of file change detection.
        This is problematic for files located on sshfs and similar network file
        systems which do not provide stable inode numbers, such files will always
        be considered modified. You can use modes without `inode` in this case to
        improve performance, but reliability of change detection might be reduced.

        ctime vs. mtime: safety vs. speed

        - ctime is a rather safe way to detect changes to a file (metadata and contents)
          as it can not be set from userspace. But, a metadata-only change will already
          update the ctime, so there might be some unnecessary chunking/hashing even
          without content changes. Some filesystems do not support ctime (change time).
          E.g. doing a chown or chmod to a file will change its ctime.
        - mtime usually works and only updates if file contents were changed. But mtime
          can be arbitrarily set from userspace, e.g. to set mtime back to the same value
          it had before a content change happened. This can be used maliciously as well as
          well-meant, but in both cases mtime based cache modes can be problematic.

        The mount points of filesystems or filesystem snapshots should be the same for every
        creation of a new archive to ensure fast operation. This is because the file cache that
        is used to determine changed files quickly uses absolute filenames.
        If this is not possible, consider creating a bind mount to a stable location.

        The ``--progress`` option shows (from left to right) Original and (uncompressed)
        deduplicated size (O and U respectively), then the Number of files (N) processed so far,
        followed by the currently processed path.

        When using ``--stats``, you will get some statistics about how much data was
        added - the "This Archive" deduplicated size there is most interesting as that is
        how much your repository will grow. Please note that the "All archives" stats refer to
        the state after creation. Also, the ``--stats`` and ``--dry-run`` options are mutually
        exclusive because the data is not actually compressed and deduplicated during a dry run.

        For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.

        For more help on placeholders, see the :ref:`borg_placeholders` command output.

        .. man NOTES

        The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will
        exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude``
        '\\*/.bundler/gems' to get the same effect.

        In addition to using ``--exclude`` patterns, it is possible to use
        ``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file
        or folder name) which, when contained within another folder, will prevent the
        containing folder from being backed up.  By default, the containing folder and
        all of its contents will be omitted from the backup.  If, however, you wish to
        only include the objects specified by ``--exclude-if-present`` in your backup,
        and not include any other contents of the containing folder, this can be enabled
        through using the ``--keep-exclude-tags`` option.

        The ``-x`` or ``--one-file-system`` option excludes directories, that are mountpoints (and everything in them).
        It detects mountpoints by comparing the device number from the output of ``stat()`` of the directory and its
        parent directory. Specifically, it excludes directories for which ``stat()`` reports a device number different
        from the device number of their parent.
        In general: be aware that there are directories with device number different from their parent, which the kernel
        does not consider a mountpoint and also the other way around.
        Linux examples for this are bind mounts (possibly same device number, but always a mountpoint) and ALL
        subvolumes of a btrfs (different device number from parent but not necessarily a mountpoint).
        macOS examples are the apfs mounts of a typical macOS installation.
        Therefore, when using ``--one-file-system``, you should double-check that the backup works as intended.


        .. _list_item_flags:

        Item flags
        ++++++++++

        ``--list`` outputs a list of all files, directories and other
        file system items it considered (no matter whether they had content changes
        or not). For each item, it prefixes a single-letter flag that indicates type
        and/or status of the item.

        If you are interested only in a subset of that output, you can give e.g.
        ``--filter=AME`` and it will only show regular files with A, M or E status (see
        below).

        A uppercase character represents the status of a regular file relative to the
        "files" cache (not relative to the repo -- this is an issue if the files cache
        is not used). Metadata is stored in any case and for 'A' and 'M' also new data
        chunks are stored. For 'U' all data chunks refer to already existing chunks.

        - 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ)
        - 'M' = regular file, modified
        - 'U' = regular file, unchanged
        - 'C' = regular file, it changed while we backed it up
        - 'E' = regular file, an error happened while accessing/reading *this* file

        A lowercase character means a file type other than a regular file,
        borg usually just stores their metadata:

        - 'd' = directory
        - 'b' = block device
        - 'c' = char device
        - 'h' = regular file, hardlink (to already seen inodes)
        - 's' = symlink
        - 'f' = fifo

        Other flags used include:

        - '+' = included, item would be backed up (if not in dry-run mode)
        - '-' = excluded, item would not be / was not backed up
        - 'i' = backup data was read from standard input (stdin)
        - '?' = missing status code (if you see this, please file a bug report!)

        Reading backup data from stdin
        ++++++++++++++++++++++++++++++

        There are two methods to read from stdin. Either specify ``-`` as path and
        pipe directly to borg::

            backup-vm --id myvm --stdout | borg create REPO::ARCHIVE -

        Or use ``--content-from-command`` to have Borg manage the execution of the
        command and piping. If you do so, the first PATH argument is interpreted
        as command to execute and any further arguments are treated as arguments
        to the command::

            borg create --content-from-command REPO::ARCHIVE -- backup-vm --id myvm --stdout

        ``--`` is used to ensure ``--id`` and ``--stdout`` are **not** considered
        arguments to ``borg`` but rather ``backup-vm``.

        The difference between the two approaches is that piping to borg creates an
        archive even if the command piping to borg exits with a failure. In this case,
        **one can end up with truncated output being backed up**. Using
        ``--content-from-command``, in contrast, borg is guaranteed to fail without
        creating an archive should the command fail. The command is considered failed
        when it returned a non-zero exit code.

        Reading from stdin yields just a stream of data without file metadata
        associated with it, and the files cache is not needed at all. So it is
        safe to disable it via ``--files-cache disabled`` and speed up backup
        creation a bit.

        By default, the content read from stdin is stored in a file called 'stdin'.
        Use ``--stdin-name`` to change the name.

        Feeding all file paths from externally
        ++++++++++++++++++++++++++++++++++++++

        Usually, you give a starting path (recursion root) to borg and then borg
        automatically recurses, finds and backs up all fs objects contained in
        there (optionally considering include/exclude rules).

        If you need more control and you want to give every single fs object path
        to borg (maybe implementing your own recursion or your own rules), you can use
        ``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will
        fail to create an archive should the command fail).

        Borg supports paths with the slashdot hack to strip path prefixes here also.
        So, be careful not to unintentionally trigger that.
        """
        )

        subparser = subparsers.add_parser(
            "create",
            parents=[common_parser],
            add_help=False,
            description=self.do_create.__doc__,
            epilog=create_epilog,
            formatter_class=argparse.RawDescriptionHelpFormatter,
            help="create backup",
        )
        subparser.set_defaults(func=self.do_create)

        # note: --dry-run and --stats are mutually exclusive, but we do not want to abort when
        #  parsing, but rather proceed with the dry-run, but without stats (see run() method).
        subparser.add_argument(
            "-n", "--dry-run", dest="dry_run", action="store_true", help="do not create a backup archive"
        )
        subparser.add_argument(
            "-s", "--stats", dest="stats", action="store_true", help="print statistics for the created archive"
        )

        subparser.add_argument(
            "--list", dest="output_list", action="store_true", help="output verbose list of items (files, dirs, ...)"
        )
        subparser.add_argument(
            "--filter",
            metavar="STATUSCHARS",
            dest="output_filter",
            action=Highlander,
            help="only display items with the given status characters (see description)",
        )
        subparser.add_argument("--json", action="store_true", help="output stats as JSON. Implies ``--stats``.")
        subparser.add_argument(
            "--no-cache-sync",
            dest="no_cache_sync",
            action="store_true",
            help="experimental: do not synchronize the cache. Implies not using the files cache.",
        )
        subparser.add_argument(
            "--no-cache-sync-forced",
            dest="no_cache_sync_forced",
            action="store_true",
            help="experimental: do not synchronize the cache (forced). Implies not using the files cache.",
        )
        subparser.add_argument(
            "--stdin-name",
            metavar="NAME",
            dest="stdin_name",
            default="stdin",
            action=MakePathSafeAction,
            help="use NAME in archive for stdin data (default: %(default)r)",
        )
        subparser.add_argument(
            "--stdin-user",
            metavar="USER",
            dest="stdin_user",
            default=None,
            action=Highlander,
            help="set user USER in archive for stdin data (default: do not store user/uid)",
        )
        subparser.add_argument(
            "--stdin-group",
            metavar="GROUP",
            dest="stdin_group",
            default=None,
            action=Highlander,
            help="set group GROUP in archive for stdin data (default: do not store group/gid)",
        )
        subparser.add_argument(
            "--stdin-mode",
            metavar="M",
            dest="stdin_mode",
            type=lambda s: int(s, 8),
            default=STDIN_MODE_DEFAULT,
            action=Highlander,
            help="set mode to M in archive for stdin data (default: %(default)04o)",
        )
        subparser.add_argument(
            "--content-from-command",
            action="store_true",
            help="interpret PATH as command and store its stdout. See also section Reading from" " stdin below.",
        )
        subparser.add_argument(
            "--paths-from-stdin",
            action="store_true",
            help="read DELIM-separated list of paths to back up from stdin. All control is external: it will back"
            " up all files given - no more, no less.",
        )
        subparser.add_argument(
            "--paths-from-command",
            action="store_true",
            help="interpret PATH as command and treat its output as ``--paths-from-stdin``",
        )
        subparser.add_argument(
            "--paths-delimiter",
            action=Highlander,
            metavar="DELIM",
            help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ",
        )

        exclude_group = define_exclusion_group(subparser, tag_files=True)
        exclude_group.add_argument(
            "--exclude-nodump", dest="exclude_nodump", action="store_true", help="exclude files flagged NODUMP"
        )

        fs_group = subparser.add_argument_group("Filesystem options")
        fs_group.add_argument(
            "-x",
            "--one-file-system",
            dest="one_file_system",
            action="store_true",
            help="stay in the same file system and do not store mount points of other file systems - "
            "this might behave different from your expectations, see the description below.",
        )
        fs_group.add_argument(
            "--numeric-ids",
            dest="numeric_ids",
            action="store_true",
            help="only store numeric user and group identifiers",
        )
        fs_group.add_argument("--atime", dest="atime", action="store_true", help="do store atime into archive")
        fs_group.add_argument("--noctime", dest="noctime", action="store_true", help="do not store ctime into archive")
        fs_group.add_argument(
            "--nobirthtime",
            dest="nobirthtime",
            action="store_true",
            help="do not store birthtime (creation date) into archive",
        )
        fs_group.add_argument(
            "--noflags",
            dest="noflags",
            action="store_true",
            help="do not read and store flags (e.g. NODUMP, IMMUTABLE) into archive",
        )
        fs_group.add_argument(
            "--noacls", dest="noacls", action="store_true", help="do not read and store ACLs into archive"
        )
        fs_group.add_argument(
            "--noxattrs", dest="noxattrs", action="store_true", help="do not read and store xattrs into archive"
        )
        fs_group.add_argument(
            "--sparse",
            dest="sparse",
            action="store_true",
            help="detect sparse holes in input (supported only by fixed chunker)",
        )
        fs_group.add_argument(
            "--files-cache",
            metavar="MODE",
            dest="files_cache_mode",
            action=Highlander,
            type=FilesCacheMode,
            default=FILES_CACHE_MODE_UI_DEFAULT,
            help="operate files cache in MODE. default: %s" % FILES_CACHE_MODE_UI_DEFAULT,
        )
        fs_group.add_argument(
            "--read-special",
            dest="read_special",
            action="store_true",
            help="open and read block and char device files as well as FIFOs as if they were "
            "regular files. Also follows symlinks pointing to these kinds of files.",
        )

        archive_group = subparser.add_argument_group("Archive options")
        archive_group.add_argument(
            "--comment",
            metavar="COMMENT",
            dest="comment",
            type=comment_validator,
            default="",
            action=Highlander,
            help="add a comment text to the archive",
        )
        archive_group.add_argument(
            "--timestamp",
            metavar="TIMESTAMP",
            dest="timestamp",
            type=timestamp,
            default=None,
            action=Highlander,
            help="manually specify the archive creation date/time (yyyy-mm-ddThh:mm:ss[(+|-)HH:MM] format, "
            "(+|-)HH:MM is the UTC offset, default: local time zone). Alternatively, give a reference file/directory.",
        )
        archive_group.add_argument(
            "-c",
            "--checkpoint-interval",
            metavar="SECONDS",
            dest="checkpoint_interval",
            type=int,
            default=1800,
            action=Highlander,
            help="write checkpoint every SECONDS seconds (Default: 1800)",
        )
        archive_group.add_argument(
            "--checkpoint-volume",
            metavar="BYTES",
            dest="checkpoint_volume",
            type=int,
            default=0,
            action=Highlander,
            help="write checkpoint every BYTES bytes (Default: 0, meaning no volume based checkpointing)",
        )
        archive_group.add_argument(
            "--chunker-params",
            metavar="PARAMS",
            dest="chunker_params",
            type=ChunkerParams,
            default=CHUNKER_PARAMS,
            action=Highlander,
            help="specify the chunker parameters (ALGO, CHUNK_MIN_EXP, CHUNK_MAX_EXP, "
            "HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %s,%d,%d,%d,%d" % CHUNKER_PARAMS,
        )
        archive_group.add_argument(
            "-C",
            "--compression",
            metavar="COMPRESSION",
            dest="compression",
            type=CompressionSpec,
            default=CompressionSpec("lz4"),
            action=Highlander,
            help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
        )

        subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
        subparser.add_argument(
            "paths", metavar="PATH", nargs="*", type=PathSpec, action="extend", help="paths to archive"
        )