Merge pull request #1242 from ThomasWaldmann/fix-1214

fixes for --read-special mode
2025-01-30 19:21:17 +00:00 · 2016-07-04 14:29:34 +02:00 · 2016-07-04 14:29:34 +02:00 · 458edf351b
commit 458edf351b
parent c9597406ba d59a1d5fef
3 changed files with 61 additions and 34 deletions
--- a/borg/archive.py
+++ b/borg/archive.py
@ -46,6 +46,11 @@
 flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
 def is_special(mode):
    # file types that get special treatment in --read-special mode
    return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
 class BackupOSError(Exception):
    """
    Wrapper for OSError raised while accessing backup files.
@ -589,9 +594,16 @@ def process_file(self, path, st, cache, ignore_inode=False):
                return status
            else:
                self.hard_links[st.st_ino, st.st_dev] = safe_path
        is_special_file = is_special(st.st_mode)
        if not is_special_file:
            path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
        first_run = not cache.files
            ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
        else:
            # in --read-special mode, we may be called for special files.
            # there should be no information in the cache about special files processed in
            # read-special mode, but we better play safe as this was wrong in the past:
            path_hash = ids = None
        first_run = not cache.files
        if first_run:
            logger.debug('Processing files ...')
        chunks = None
@ -616,10 +628,17 @@ def process_file(self, path, st, cache, ignore_inode=False):
                    chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
                    if self.show_progress:
                        self.stats.show_progress(item=item, dt=0.2)
            if not is_special_file:
                # we must not memorize special files, because the contents of e.g. a
                # block or char device will change without its mtime/size/inode changing.
                cache.memorize_file(path_hash, st, [c[0] for c in chunks])
            status = status or 'M'  # regular file, modified (if not 'A' already)
        item[b'chunks'] = chunks
        item.update(self.stat_attrs(st, path))
        if is_special_file:
            # we processed a special file like a regular file. reflect that in mode,
            # so it can be extracted / accessed in FUSE mount like a regular file:
            item[b'mode'] = stat.S_IFREG | stat.S_IMODE(item[b'mode'])
        self.stats.nfiles += 1
        self.add_item(item)
        return status
--- a/borg/archiver.py
+++ b/borg/archiver.py
@ -29,7 +29,7 @@
 from .repository import Repository
 from .cache import Cache
 from .key import key_creator, RepoKey, PassphraseKey
-from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS
+from .archive import backup_io, BackupOSError, Archive, ArchiveChecker, CHUNKER_PARAMS, is_special
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 has_lchflags = hasattr(os, 'lchflags')
@ -256,15 +256,7 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
            return
        try:
-            # usually, do not follow symlinks (if we have a symlink, we want to
+            st = os.lstat(path)
            # backup it as such).
            # but if we are in --read-special mode, we later process <path> as
            # a regular file (we open and read the symlink target file's content).
            # thus, in read_special mode, we also want to stat the symlink target
            # file, for consistency. if we did not, we also have issues extracting
            # this file, as it would be in the archive as a symlink, not as the
            # target's file type (which could be e.g. a block device).
            st = os.stat(path, follow_symlinks=read_special)
        except OSError as e:
            self.print_warning('%s: %s', path, e)
            return
@ -277,7 +269,7 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
        # Ignore if nodump flag is set
        if has_lchflags and (st.st_flags & stat.UF_NODUMP):
            return
-        if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
+        if stat.S_ISREG(st.st_mode):
            if not dry_run:
                try:
                    status = archive.process_file(path, st, cache, self.ignore_inode)
@ -309,13 +301,26 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
                                  read_special=read_special, dry_run=dry_run)
        elif stat.S_ISLNK(st.st_mode):
            if not dry_run:
                if not read_special:
                    status = archive.process_symlink(path, st)
                else:
                    st_target = os.stat(path)
                    if is_special(st_target.st_mode):
                        status = archive.process_file(path, st_target, cache)
                    else:
                        status = archive.process_symlink(path, st)
        elif stat.S_ISFIFO(st.st_mode):
            if not dry_run:
                if not read_special:
                    status = archive.process_fifo(path, st)
                else:
                    status = archive.process_file(path, st, cache)
        elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
            if not dry_run:
                if not read_special:
                    status = archive.process_dev(path, st)
                else:
                    status = archive.process_file(path, st, cache)
        elif stat.S_ISSOCK(st.st_mode):
            # Ignore unix sockets
            return
@ -1134,7 +1139,8 @@ def build_parser(self, args=None, prog=None):
                                    'lzma,0 .. lzma,9 == lzma (with level 0..9).')
        subparser.add_argument('--read-special', dest='read_special',
                               action='store_true', default=False,
-                               help='open and read special files as if they were regular files')
+                               help='open and read block and char device files as well as FIFOs as if they were '
                                    'regular files. Also follows symlinks pointing to these kinds of files.')
        subparser.add_argument('-n', '--dry-run', dest='dry_run',
                               action='store_true', default=False,
                               help='do not create a backup archive')
--- a/docs/usage.rst
+++ b/docs/usage.rst
@ -651,32 +651,34 @@ For more details, see :ref:`chunker_details`.
 --read-special
 ~~~~~~~~~~~~~~
-The option ``--read-special`` is not intended for normal, filesystem-level (full or
+The --read-special option is special - you do not want to use it for normal
-partly-recursive) backups. You only give this option if you want to do something
+full-filesystem backups, but rather after carefully picking some targets for it.
 rather ... special -- and if you have hand-picked some files that you want to treat
 that way.
-``borg create --read-special`` will open all files without doing any special
+The option ``--read-special`` triggers special treatment for block and char
-treatment according to the file type (the only exception here are directories:
+device files as well as FIFOs. Instead of storing them as such a device (or
-they will be recursed into). Just imagine what happens if you do ``cat
+FIFO), they will get opened, their content will be read and in the backup
-filename`` --- the content you will see there is what borg will backup for that
+archive they will show up like a regular file.
 filename.
-So, for example, symlinks will be followed, block device content will be read,
+Symlinks will also get special treatment if (and only if) they point to such
-named pipes / UNIX domain sockets will be read.
+a special file: instead of storing them as a symlink, the target special file
 will get processed as described above.
-You need to be careful with what you give as filename when using ``--read-special``,
+One intended use case of this is backing up the contents of one or multiple
-e.g. if you give ``/dev/zero``, your backup will never terminate.
+block devices, like e.g. LVM snapshots or inactive LVs or disk partitions.
-The given files' metadata is saved as it would be saved without
+You need to be careful about what you include when using ``--read-special``,
-``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but
+e.g. if you include ``/dev/zero``, your backup will never terminate.
 additionally, also the content read from it will be saved for it.
 Restoring such files' content is currently only supported one at a time via
 ``--stdout`` option (and you have to redirect stdout to where ever it shall go,
 maybe directly into an existing device file of your choice or indirectly via
 ``dd``).
 To some extent, mounting a backup archive with the backups of special files
 via ``borg mount`` and then loop-mounting the image files from inside the mount
 point will work. If you plan to access a lot of data in there, it likely will
 scale and perform better if you do not work via the FUSE mount.
 Example
 +++++++