diff --git a/src/borg/archive.py b/src/borg/archive.py index 24a7d96e0..87ebf1926 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -196,9 +196,9 @@ def backup_io_iter(iterator): @contextmanager -def OsOpen(path, flags, noatime=False, op='open'): +def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op='open'): with backup_io(op): - fd = os_open(path, flags, noatime) + fd = os_open(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=noatime) try: yield fd finally: @@ -1076,31 +1076,46 @@ def create_helper(self, path, st, status=None, hardlinkable=True): if hardlink_master: self.hard_links[(st.st_ino, st.st_dev)] = safe_path - def process_dir(self, *, path, st): + def process_dir(self, *, path, fd, st): with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): - item.update(self.metadata_collector.stat_attrs(st, path)) + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_fifo(self, *, path, st): + def process_fifo(self, *, path, parent_fd, name, st): with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo - item.update(self.metadata_collector.stat_attrs(st, path)) - return status + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: + with backup_io('fstat'): + curr_st = os.fstat(fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISFIFO(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + return status - def process_dev(self, *, path, st, dev_type): + def process_dev(self, *, path, parent_fd, name, st, dev_type): with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device - item.rdev = st.st_rdev - item.update(self.metadata_collector.stat_attrs(st, path)) - return status + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: + with backup_io('fstat'): + curr_st = os.fstat(fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + item.rdev = st.st_rdev + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + return status - def process_symlink(self, *, path, st): + def process_symlink(self, *, path, parent_fd, name, st): # note: using hardlinkable=False because we can not support hardlinked symlinks, # due to the dual-use of item.source, see issue #2343: # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks. with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + fname = name if name is not None and parent_fd is not None else path with backup_io('readlink'): - source = os.readlink(path) + source = os.readlink(fname, dir_fd=parent_fd) item.source = source - item.update(self.metadata_collector.stat_attrs(st, path)) + item.update(self.metadata_collector.stat_attrs(st, path)) # can't use FD here? return status def process_stdin(self, *, path, cache): @@ -1120,9 +1135,9 @@ def process_stdin(self, *, path, cache): self.add_item(item, stats=self.stats) return 'i' # stdin - def process_file(self, *, path, st, cache): + def process_file(self, *, path, parent_fd, name, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet - with OsOpen(path, flags_normal, noatime=True) as fd: + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: with backup_io('fstat'): curr_st = os.fstat(fd) # XXX do some checks here: st vs. curr_st @@ -1172,7 +1187,7 @@ def process_file(self, *, path, st, cache): # we processed a special file like a regular file. reflect that in mode, # so it can be extracted / accessed in FUSE mount like a regular file: item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) - return status + return status def valid_msgpacked_dict(d, keys_serialized): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 09b3e213c..5747ed612 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -34,7 +34,7 @@ from . import helpers from .algorithms.checksums import crc32 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special -from .archive import BackupError, BackupOSError, backup_io +from .archive import BackupError, BackupOSError, backup_io, OsOpen from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor from .cache import Cache, assert_secure, SecurityManager from .constants import * # NOQA @@ -66,6 +66,7 @@ from .helpers import popen_with_error_handling, prepare_subprocess_env from .helpers import dash_open from .helpers import umount +from .helpers import flags_root, flags_dir from .helpers import msgpack from .nanorst import rst_to_terminal from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern @@ -479,20 +480,23 @@ def create_inner(archive, cache, fso): self.print_file_status(status, path) continue path = os.path.normpath(path) - try: - st = os.stat(path, follow_symlinks=False) - except OSError as e: - self.print_warning('%s: %s', path, e) - continue - if args.one_file_system: - restrict_dev = st.st_dev - else: - restrict_dev = None - self._process(path=path, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, - keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) + parent_dir = os.path.dirname(path) or '.' + name = os.path.basename(path) + with OsOpen(path=parent_dir, flags=flags_root, noatime=True, op='open_root') as parent_fd: + try: + st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) + except OSError as e: + self.print_warning('%s: %s', path, e) + continue + if args.one_file_system: + restrict_dev = st.st_dev + else: + restrict_dev = None + self._process(path=path, parent_fd=parent_fd, name=name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, + keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: archive.save(comment=args.comment, timestamp=args.timestamp) if args.progress: @@ -544,12 +548,12 @@ def create_inner(archive, cache, fso): create_inner(None, None, None) return self.exit_code - def _process(self, *, path, + def _process(self, *, path, parent_fd=None, name=None, fso, cache, matcher, exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes, restrict_dev, read_special=False, dry_run=False): """ - Process *path* recursively according to the various parameters. + Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters. This should only raise on critical errors. Per-item errors must be handled within this method. """ @@ -557,7 +561,7 @@ def _process(self, *, path, recurse_excluded_dir = False if matcher.match(path): with backup_io('stat'): - st = os.stat(path, follow_symlinks=False) + st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) else: self.print_file_status('x', path) # get out here as quickly as possible: @@ -567,7 +571,7 @@ def _process(self, *, path, if not matcher.recurse_dir: return with backup_io('stat'): - st = os.stat(path, follow_symlinks=False) + st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) recurse_excluded_dir = stat.S_ISDIR(st.st_mode) if not recurse_excluded_dir: return @@ -582,75 +586,85 @@ def _process(self, *, path, if self.exclude_nodump: # Ignore if nodump flag is set with backup_io('flags'): - if get_flags(path, st) & stat.UF_NODUMP: + if get_flags(path=path, st=st) & stat.UF_NODUMP: self.print_file_status('x', path) return if stat.S_ISREG(st.st_mode): if not dry_run: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISDIR(st.st_mode): - if recurse: - tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) - if tag_paths: - # if we are already recursing in an excluded dir, we do not need to do anything else than - # returning (we do not need to archive or recurse into tagged directories), see #3991: + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, + noatime=True, op='dir_open') as child_fd: + with backup_io('fstat'): + curr_st = os.fstat(child_fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISDIR(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + if recurse: + tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_names: + # if we are already recursing in an excluded dir, we do not need to do anything else than + # returning (we do not need to archive or recurse into tagged directories), see #3991: + if not recurse_excluded_dir: + if keep_exclude_tags and not dry_run: + fso.process_dir(path=path, fd=child_fd, st=st) + for tag_name in tag_names: + tag_path = os.path.join(path, tag_name) + self._process(path=tag_path, parent_fd=child_fd, name=tag_name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) + self.print_file_status('x', path) + return + if not dry_run: if not recurse_excluded_dir: - if keep_exclude_tags and not dry_run: - fso.process_dir(path=path, st=st) - for tag_path in tag_paths: - self._process(path=tag_path, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, - keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) - self.print_file_status('x', path) - return - if not dry_run: - if not recurse_excluded_dir: - status = fso.process_dir(path=path, st=st) - if recurse: - with backup_io('scandir'): - entries = helpers.scandir_inorder(path) - for dirent in entries: - normpath = os.path.normpath(dirent.path) - self._process(path=normpath, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, - keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) + status = fso.process_dir(path=path, fd=child_fd, st=st) + if recurse: + with backup_io('scandir'): + entries = helpers.scandir_inorder(path=path, fd=child_fd) + for dirent in entries: + normpath = os.path.normpath(dirent.path) + self._process(path=normpath, parent_fd=child_fd, name=dirent.name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: if not read_special: - status = fso.process_symlink(path=path, st=st) + status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) else: try: - st_target = os.stat(path) + st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True) except OSError: special = False else: special = is_special(st_target.st_mode) if special: - status = fso.process_file(path=path, st=st_target, cache=cache) + # XXX must FOLLOW symlinks! + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, cache=cache) else: - status = fso.process_symlink(path=path, st=st) + status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: if not read_special: - status = fso.process_fifo(path=path, st=st) + status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st) else: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISCHR(st.st_mode): if not dry_run: if not read_special: - status = fso.process_dev(path=path, st=st, dev_type='c') + status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c') else: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISBLK(st.st_mode): if not dry_run: if not read_special: - status = fso.process_dev(path=path, st=st, dev_type='b') + status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b') else: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 93852c5bc..6c9d9556f 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -15,6 +15,9 @@ logger = create_logger() +py_37_plus = sys.version_info >= (3, 7) + + def get_base_dir(): """Get home directory / base directory for borg: @@ -103,18 +106,19 @@ def dir_is_cachedir(path): def dir_is_tagged(path, exclude_caches, exclude_if_present): """Determines whether the specified path is excluded by being a cache directory or containing user-specified tag files/directories. Returns a - list of the paths of the tag files/directories (either CACHEDIR.TAG or the + list of the names of the tag files/directories (either CACHEDIR.TAG or the matching user-specified files/directories). """ - tag_paths = [] + # TODO: do operations based on the directory fd + tag_names = [] if exclude_caches and dir_is_cachedir(path): - tag_paths.append(os.path.join(path, CACHE_TAG_NAME)) + tag_names.append(CACHE_TAG_NAME) if exclude_if_present is not None: for tag in exclude_if_present: tag_path = os.path.join(path, tag) if os.path.exists(tag_path): - tag_paths.append(tag_path) - return tag_paths + tag_names.append(tag) + return tag_names _safe_re = re.compile(r'^((\.\.)?/+)+') @@ -144,8 +148,10 @@ def scandir_keyfunc(dirent): return (1, dirent.name) -def scandir_inorder(path='.'): - return sorted(os.scandir(path), key=scandir_keyfunc) +def scandir_inorder(*, path, fd=None): + # py37+ supports giving a fd instead of a path + arg = fd if fd is not None and py_37_plus else path + return sorted(os.scandir(arg), key=scandir_keyfunc) def secure_erase(path): @@ -199,23 +205,39 @@ def O_(*flags): flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') # later: add 'NOFOLLOW' flags_normal = flags_base | O_('RDONLY') flags_noatime = flags_normal | O_('NOATIME') +flags_root = O_('RDONLY') +flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW') -def os_open(path, flags, noatime=False): +def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False): + """ + Use os.open to open a fs item. + + If parent_fd and name are given, they are preferred and openat will be used, + path is not used in this case. + + :param path: full (but not necessarily absolute) path + :param parent_fd: open directory file descriptor + :param name: name relative to parent_fd + :param flags: open flags for os.open() (int) + :param noatime: True if access time shall be preserved + :return: file descriptor + """ + fname = name if name is not None and parent_fd is not None else path _flags_normal = flags if noatime: _flags_noatime = _flags_normal | O_('NOATIME') try: # if we have O_NOATIME, this likely will succeed if we are root or owner of file: - fd = os.open(path, _flags_noatime) + fd = os.open(fname, _flags_noatime, dir_fd=parent_fd) except PermissionError: if _flags_noatime == _flags_normal: # we do not have O_NOATIME, no need to try again: raise # Was this EPERM due to the O_NOATIME flag? Try again without it: - fd = os.open(path, _flags_normal) + fd = os.open(fname, _flags_normal, dir_fd=parent_fd) else: - fd = os.open(path, _flags_normal) + fd = os.open(fname, _flags_normal, dir_fd=parent_fd) return fd