From 34f6cfcd81f765c4e2fc15286fa08ccf5df4df5e Mon Sep 17 00:00:00 2001 From: Lapinot Date: Sun, 15 Nov 2020 15:31:01 +0100 Subject: [PATCH] Outsource recursive directory walking (#5492) Split recursive directory walking/processing into walking and item processing. --- src/borg/archive.py | 15 +++- src/borg/archiver.py | 178 ++++++++++++++++++++++--------------------- 2 files changed, 105 insertions(+), 88 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 65b9919e6..fa0c7d7e6 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -38,7 +38,7 @@ from .helpers import bin_to_hex from .helpers import safe_ns from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi -from .helpers import os_open, flags_normal +from .helpers import os_open, flags_normal, flags_dir from .helpers import msgpack from .helpers import sig_int from .patterns import PathPrefixPattern, FnmatchPattern, IECommand @@ -1200,11 +1200,22 @@ def create_helper(self, path, st, status=None, hardlinkable=True): if hardlink_master: self.hard_links[(st.st_ino, st.st_dev)] = safe_path - def process_dir(self, *, path, fd, st): + def process_dir_with_fd(self, *, path, fd, st): with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status + def process_dir(self, *, path, parent_fd, name, st): + with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, + noatime=True, op='dir_open') as fd: + # fd is None for directories on windows, in that case a race condition check is not possible. + if fd is not None: + with backup_io('fstat'): + st = stat_update_check(st, os.fstat(fd)) + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + return status + def process_fifo(self, *, path, parent_fd, name, st): with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index a5bc3bff9..14fd3057f 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -565,11 +565,11 @@ def create_inner(archive, cache, fso): restrict_dev = st.st_dev else: restrict_dev = None - self._process(path=path, parent_fd=parent_fd, name=name, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, - keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) + self._rec_walk(path=path, parent_fd=parent_fd, name=name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, + keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: if args.progress: archive.stats.show_progress(final=True) @@ -626,10 +626,66 @@ def create_inner(archive, cache, fso): create_inner(None, None, None) return self.exit_code - def _process(self, *, path, parent_fd=None, name=None, - fso, cache, matcher, - exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes, - restrict_dev, read_special=False, dry_run=False): + def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run): + """ + Call the right method on the given FilesystemObjectProcessor. + """ + + if dry_run: + return '-' + elif stat.S_ISREG(st.st_mode): + return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) + elif stat.S_ISDIR(st.st_mode): + return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st) + elif stat.S_ISLNK(st.st_mode): + if not read_special: + return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) + else: + try: + st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True) + except OSError: + special = False + else: + special = is_special(st_target.st_mode) + if special: + return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, + cache=cache, flags=flags_special_follow) + else: + return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) + elif stat.S_ISFIFO(st.st_mode): + if not read_special: + return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st) + else: + return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, + cache=cache, flags=flags_special) + elif stat.S_ISCHR(st.st_mode): + if not read_special: + return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c') + else: + return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, + cache=cache, flags=flags_special) + elif stat.S_ISBLK(st.st_mode): + if not read_special: + return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b') + else: + return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, + cache=cache, flags=flags_special) + elif stat.S_ISSOCK(st.st_mode): + # Ignore unix sockets + return + elif stat.S_ISDOOR(st.st_mode): + # Ignore Solaris doors + return + elif stat.S_ISPORT(st.st_mode): + # Ignore Solaris event ports + return + else: + self.print_warning('Unknown file type: %s', path) + return + + def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher, + exclude_caches, exclude_if_present, keep_exclude_tags, + skip_inodes, restrict_dev, read_special, dry_run): """ Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters. @@ -639,6 +695,7 @@ def _process(self, *, path, parent_fd=None, name=None, # the user says "get out of here!" and we have already completed the desired action. return + status = None try: recurse_excluded_dir = False if matcher.match(path): @@ -652,10 +709,10 @@ def _process(self, *, path, parent_fd=None, name=None, # could trigger an error, e.g. if access is forbidden, see #3209. if not matcher.recurse_dir: return + recurse_excluded_dir = True with backup_io('stat'): st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False) - recurse_excluded_dir = stat.S_ISDIR(st.st_mode) - if not recurse_excluded_dir: + if not stat.S_ISDIR(st.st_mode): return if (st.st_ino, st.st_dev) in skip_inodes: @@ -664,17 +721,20 @@ def _process(self, *, path, parent_fd=None, name=None, # but we WILL save the mountpoint directory (or more precise: the root # directory of the mounted filesystem that shadows the mountpoint dir). recurse = restrict_dev is None or st.st_dev == restrict_dev - status = None + if self.exclude_nodump: # Ignore if nodump flag is set with backup_io('flags'): if get_flags(path=path, st=st) & stat.UF_NODUMP: self.print_file_status('x', path) return - if stat.S_ISREG(st.st_mode): - if not dry_run: - status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) - elif stat.S_ISDIR(st.st_mode): + + if not stat.S_ISDIR(st.st_mode): + # directories cannot go in this branch because they can be excluded based on tag + # files they might contain + status = self._process_any(path=path, parent_fd=parent_fd, name=name, st=st, fso=fso, cache=cache, + read_special=read_special, dry_run=dry_run) + else: with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op='dir_open') as child_fd: # child_fd is None for directories on windows, in that case a race condition check is not possible. @@ -688,90 +748,36 @@ def _process(self, *, path, parent_fd=None, name=None, # returning (we do not need to archive or recurse into tagged directories), see #3991: if not recurse_excluded_dir: if keep_exclude_tags and not dry_run: - fso.process_dir(path=path, fd=child_fd, st=st) + fso.process_dir_with_fd(path=path, fd=child_fd, st=st) for tag_name in tag_names: tag_path = os.path.join(path, tag_name) - self._process(path=tag_path, parent_fd=child_fd, name=tag_name, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, - keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) + self._rec_walk( + path=tag_path, parent_fd=child_fd, name=tag_name, fso=fso, cache=cache, + matcher=matcher, exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) self.print_file_status('x', path) return - if not dry_run: - if not recurse_excluded_dir: - status = fso.process_dir(path=path, fd=child_fd, st=st) + if not recurse_excluded_dir and not dry_run: + status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st) if recurse: with backup_io('scandir'): entries = helpers.scandir_inorder(path=path, fd=child_fd) for dirent in entries: normpath = os.path.normpath(os.path.join(path, dirent.name)) - self._process(path=normpath, parent_fd=child_fd, name=dirent.name, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, - keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) - elif stat.S_ISLNK(st.st_mode): - if not dry_run: - if not read_special: - status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) - else: - try: - st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True) - except OSError: - special = False - else: - special = is_special(st_target.st_mode) - if special: - status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, - cache=cache, flags=flags_special_follow) - else: - status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) - elif stat.S_ISFIFO(st.st_mode): - if not dry_run: - if not read_special: - status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st) - else: - status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, - cache=cache, flags=flags_special) - elif stat.S_ISCHR(st.st_mode): - if not dry_run: - if not read_special: - status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c') - else: - status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, - cache=cache, flags=flags_special) - elif stat.S_ISBLK(st.st_mode): - if not dry_run: - if not read_special: - status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b') - else: - status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, - cache=cache, flags=flags_special) - elif stat.S_ISSOCK(st.st_mode): - # Ignore unix sockets - return - elif stat.S_ISDOOR(st.st_mode): - # Ignore Solaris doors - return - elif stat.S_ISPORT(st.st_mode): - # Ignore Solaris event ports - return - else: - self.print_warning('Unknown file type: %s', path) - return + self._rec_walk( + path=normpath, parent_fd=child_fd, name=dirent.name, fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, restrict_dev=restrict_dev, + read_special=read_special, dry_run=dry_run) + except (BackupOSError, BackupError) as e: self.print_warning('%s: %s', path, e) status = 'E' if status == 'C': self.print_warning('%s: file changed while we backed it up', path) - # Status output if status is None: - if not dry_run: - status = '?' # need to add a status code somewhere - else: - status = '-' # dry run, item was not backed up - + status = '?' # need to add a status code somewhere if not recurse_excluded_dir: self.print_file_status(status, path) @@ -4592,7 +4598,7 @@ def sig_info_handler(sig_no, stack): # pragma: no cover with signal_handler(sig_no, signal.SIG_IGN): for frame in inspect.getouterframes(stack): func, loc = frame[3], frame[0].f_locals - if func in ('process_file', '_process', ): # create op + if func in ('process_file', '_rec_walk', ): # create op path = loc['path'] try: pos = loc['fd'].tell()