diff --git a/docs/usage/create.rst b/docs/usage/create.rst index f2ff4966..0bec42ff 100644 --- a/docs/usage/create.rst +++ b/docs/usage/create.rst @@ -10,6 +10,9 @@ Examples # same, but list all files as we process them $ borg create --list my-documents ~/Documents + # Backup /mnt/disk/docs, but strip path prefix using the slashdot hack + $ borg create /path/to/repo::docs /mnt/disk/./docs + # Backup ~/Documents and ~/src but exclude pyc files $ borg create my-files \ ~/Documents \ diff --git a/src/borg/archive.py b/src/borg/archive.py index d4449ec4..8aee6847 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1361,7 +1361,16 @@ class FilesystemObjectProcessors: self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse) @contextmanager - def create_helper(self, path, st, status=None, hardlinkable=True): + def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None): + if strip_prefix is not None: + assert not path.endswith(os.sep) + if strip_prefix.startswith(path + os.sep): + # still on a directory level that shall be stripped - do not create an item for this! + yield None, "x", False, None + return + # adjust path, remove stripped directory levels + path = path.removeprefix(strip_prefix) + sanitized_path = remove_dotdot_prefixes(path) item = Item(path=sanitized_path) hardlinked = hardlinkable and st.st_nlink > 1 @@ -1384,13 +1393,26 @@ class FilesystemObjectProcessors: chunks = item.chunks if "chunks" in item else None self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks) - def process_dir_with_fd(self, *, path, fd, st): - with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks): - item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + def process_dir_with_fd(self, *, path, fd, st, strip_prefix): + with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as ( + item, + status, + hardlinked, + hl_chunks, + ): + if item is not None: + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_dir(self, *, path, parent_fd, name, st): - with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks): + def process_dir(self, *, path, parent_fd, name, st, strip_prefix): + with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as ( + item, + status, + hardlinked, + hl_chunks, + ): + if item is None: + return status with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open") as fd: # fd is None for directories on windows, in that case a race condition check is not possible. if fd is not None: @@ -1399,25 +1421,46 @@ class FilesystemObjectProcessors: item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_fifo(self, *, path, parent_fd, name, st): - with self.create_helper(path, st, "f") as (item, status, hardlinked, hl_chunks): # fifo + def process_fifo(self, *, path, parent_fd, name, st, strip_prefix): + with self.create_helper(path, st, "f", strip_prefix=strip_prefix) as ( + item, + status, + hardlinked, + hl_chunks, + ): # fifo + if item is None: + return status with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: with backup_io("fstat"): st = stat_update_check(st, os.fstat(fd)) item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_dev(self, *, path, parent_fd, name, st, dev_type): - with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hl_chunks): # char/block device + def process_dev(self, *, path, parent_fd, name, st, dev_type, strip_prefix): + with self.create_helper(path, st, dev_type, strip_prefix=strip_prefix) as ( + item, + status, + hardlinked, + hl_chunks, + ): # char/block device # looks like we can not work fd-based here without causing issues when trying to open/close the device + if item is None: + return status with backup_io("stat"): st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)) item.rdev = st.st_rdev item.update(self.metadata_collector.stat_attrs(st, path)) return status - def process_symlink(self, *, path, parent_fd, name, st): - with self.create_helper(path, st, "s", hardlinkable=True) as (item, status, hardlinked, hl_chunks): + def process_symlink(self, *, path, parent_fd, name, st, strip_prefix): + with self.create_helper(path, st, "s", hardlinkable=True, strip_prefix=strip_prefix) as ( + item, + status, + hardlinked, + hl_chunks, + ): + if item is None: + return status fname = name if name is not None and parent_fd is not None else path with backup_io("readlink"): target = os.readlink(fname, dir_fd=parent_fd) @@ -1466,8 +1509,15 @@ class FilesystemObjectProcessors: self.add_item(item, stats=self.stats) return status - def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False): - with self.create_helper(path, st, None) as (item, status, hardlinked, hl_chunks): # no status yet + def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False, strip_prefix): + with self.create_helper(path, st, None, strip_prefix=strip_prefix) as ( + item, + status, + hardlinked, + hl_chunks, + ): # no status yet + if item is None: + return status with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd: with backup_io("fstat"): st = stat_update_check(st, os.fstat(fd)) diff --git a/src/borg/archiver/create_cmd.py b/src/borg/archiver/create_cmd.py index 62b6fab0..5b0547ca 100644 --- a/src/borg/archiver/create_cmd.py +++ b/src/borg/archiver/create_cmd.py @@ -20,7 +20,7 @@ from ..helpers import comment_validator, ChunkerParams, PathSpec from ..helpers import archivename_validator, FilesCacheMode from ..helpers import eval_escapes from ..helpers import timestamp, archive_ts_now -from ..helpers import get_cache_dir, os_stat +from ..helpers import get_cache_dir, os_stat, get_strip_prefix from ..helpers import dir_is_tagged from ..helpers import log_multi from ..helpers import basic_json_data, json_print @@ -107,6 +107,7 @@ class CreateMixIn: pipe_bin = sys.stdin.buffer pipe = TextIOWrapper(pipe_bin, errors="surrogateescape") for path in iter_separated(pipe, paths_sep): + strip_prefix = get_strip_prefix(path) path = os.path.normpath(path) try: with backup_io("stat"): @@ -120,6 +121,7 @@ class CreateMixIn: cache=cache, read_special=args.read_special, dry_run=dry_run, + strip_prefix=strip_prefix, ) except BackupError as e: self.print_warning_instance(BackupWarning(path, e)) @@ -157,6 +159,8 @@ class CreateMixIn: if not dry_run and status is not None: fso.stats.files_stats[status] += 1 continue + + strip_prefix = get_strip_prefix(path) path = os.path.normpath(path) try: with backup_io("stat"): @@ -176,6 +180,7 @@ class CreateMixIn: restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run, + strip_prefix=strip_prefix, ) # if we get back here, we've finished recursing into , # we do not ever want to get back in there (even if path is given twice as recursion root) @@ -274,7 +279,7 @@ class CreateMixIn: else: create_inner(None, None, None) - def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run): + def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix): """ Call the right method on the given FilesystemObjectProcessor. """ @@ -287,13 +292,21 @@ class CreateMixIn: try: if stat.S_ISREG(st.st_mode): return fso.process_file( - path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, last_try=last_try + path=path, + parent_fd=parent_fd, + name=name, + st=st, + cache=cache, + last_try=last_try, + strip_prefix=strip_prefix, ) elif stat.S_ISDIR(st.st_mode): - return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix) elif stat.S_ISLNK(st.st_mode): if not read_special: - return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_symlink( + path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix + ) else: try: st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True) @@ -310,12 +323,17 @@ class CreateMixIn: cache=cache, flags=flags_special_follow, last_try=last_try, + strip_prefix=strip_prefix, ) else: - return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_symlink( + path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix + ) elif stat.S_ISFIFO(st.st_mode): if not read_special: - return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_fifo( + path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix + ) else: return fso.process_file( path=path, @@ -325,10 +343,13 @@ class CreateMixIn: cache=cache, flags=flags_special, last_try=last_try, + strip_prefix=strip_prefix, ) elif stat.S_ISCHR(st.st_mode): if not read_special: - return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c") + return fso.process_dev( + path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c", strip_prefix=strip_prefix + ) else: return fso.process_file( path=path, @@ -338,10 +359,13 @@ class CreateMixIn: cache=cache, flags=flags_special, last_try=last_try, + strip_prefix=strip_prefix, ) elif stat.S_ISBLK(st.st_mode): if not read_special: - return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b") + return fso.process_dev( + path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b", strip_prefix=strip_prefix + ) else: return fso.process_file( path=path, @@ -351,6 +375,7 @@ class CreateMixIn: cache=cache, flags=flags_special, last_try=last_try, + strip_prefix=strip_prefix, ) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets @@ -401,6 +426,7 @@ class CreateMixIn: restrict_dev, read_special, dry_run, + strip_prefix, ): """ Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters. @@ -457,6 +483,7 @@ class CreateMixIn: cache=cache, read_special=read_special, dry_run=dry_run, + strip_prefix=strip_prefix, ) else: with OsOpen( @@ -474,7 +501,9 @@ class CreateMixIn: if not recurse_excluded_dir: if keep_exclude_tags: if not dry_run: - fso.process_dir_with_fd(path=path, fd=child_fd, st=st) + fso.process_dir_with_fd( + path=path, fd=child_fd, st=st, strip_prefix=strip_prefix + ) for tag_name in tag_names: tag_path = os.path.join(path, tag_name) self._rec_walk( @@ -491,12 +520,13 @@ class CreateMixIn: restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run, + strip_prefix=strip_prefix, ) self.print_file_status("-", path) # excluded return if not recurse_excluded_dir: if not dry_run: - status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st) + status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix) else: status = "+" # included (dir) if recurse: @@ -518,6 +548,7 @@ class CreateMixIn: restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run, + strip_prefix=strip_prefix, ) except BackupError as e: @@ -541,6 +572,11 @@ class CreateMixIn: that means if relative paths are desired, the command has to be run from the correct directory. + The slashdot hack in paths (recursion roots) is triggered by using ``/./``: + ``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but + strip the prefix on the left side of ``./`` from the archived items (in this case, + ``this/gets/archived`` will be the path in the archived item). + When giving '-' as path, borg will read data from standard input and create a file 'stdin' in the created archive from that data. In some cases it's more appropriate to use --content-from-command, however. See section *Reading from @@ -680,8 +716,8 @@ class CreateMixIn: - 'i' = backup data was read from standard input (stdin) - '?' = missing status code (if you see this, please file a bug report!) - Reading from stdin - ++++++++++++++++++ + Reading backup data from stdin + ++++++++++++++++++++++++++++++ There are two methods to read from stdin. Either specify ``-`` as path and pipe directly to borg:: @@ -712,6 +748,21 @@ class CreateMixIn: By default, the content read from stdin is stored in a file called 'stdin'. Use ``--stdin-name`` to change the name. + + Feeding all file paths from externally + ++++++++++++++++++++++++++++++++++++++ + + Usually, you give a starting path (recursion root) to borg and then borg + automatically recurses, finds and backs up all fs objects contained in + there (optionally considering include/exclude rules). + + If you need more control and you want to give every single fs object path + to borg (maybe implementing your own recursion or your own rules), you can use + ``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will + fail to create an archive should the command fail). + + Borg supports paths with the slashdot hack to strip path prefixes here also. + So, be careful not to unintentionally trigger that. """ ) diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index 82364613..53555e7e 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -20,7 +20,7 @@ from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundErro from .fs import ensure_dir, join_base_dir, get_socket_filename from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder -from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount +from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime from .fs import HardLinkManager from .misc import sysinfo, log_multi, consume diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 97939f89..f422fc25 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -233,6 +233,21 @@ def make_path_safe(path): return path +def get_strip_prefix(path): + # similar to how rsync does it, we allow users to give paths like: + # /this/gets/stripped/./this/is/kept + # the whole path is what is used to read from the fs, + # the strip_prefix will be /this/gets/stripped/ and + # this/is/kept is the path being archived. + pos = path.find("/./") # detect slashdot hack + if pos > 0: + # found a prefix to strip! make sure it ends with one "/"! + return os.path.normpath(path[:pos]) + os.sep + else: + # no or empty prefix, nothing to strip! + return None + + _dotdot_re = re.compile(r"^(\.\./)+") diff --git a/src/borg/testsuite/archiver/create_cmd.py b/src/borg/testsuite/archiver/create_cmd.py index d536c435..a064f463 100644 --- a/src/borg/testsuite/archiver/create_cmd.py +++ b/src/borg/testsuite/archiver/create_cmd.py @@ -908,6 +908,32 @@ def test_create_read_special_broken_symlink(archivers, request): assert "input/link -> somewhere does not exist" in output +def test_create_dotslash_hack(archivers, request): + archiver = request.getfixturevalue(archivers) + os.makedirs(os.path.join(archiver.input_path, "first", "secondA", "thirdA")) + os.makedirs(os.path.join(archiver.input_path, "first", "secondB", "thirdB")) + cmd(archiver, "rcreate", RK_ENCRYPTION) + cmd(archiver, "create", "test", "input/first/./") # hack! + output = cmd(archiver, "list", "test") + # dir levels left of slashdot (= input, first) not in archive: + assert "input" not in output + assert "input/first" not in output + assert "input/first/secondA" not in output + assert "input/first/secondA/thirdA" not in output + assert "input/first/secondB" not in output + assert "input/first/secondB/thirdB" not in output + assert "first" not in output + assert "first/secondA" not in output + assert "first/secondA/thirdA" not in output + assert "first/secondB" not in output + assert "first/secondB/thirdB" not in output + # dir levels right of slashdot are in archive: + assert "secondA" in output + assert "secondA/thirdA" in output + assert "secondB" in output + assert "secondB/thirdB" in output + + def test_log_json(archivers, request): archiver = request.getfixturevalue(archivers) create_test_files(archiver.input_path)