Merge pull request #8107 from ThomasWaldmann/slashdot-hack-master

slashdot hack (master)
This commit is contained in:
TW 2024-02-20 12:56:40 +01:00 committed by GitHub
commit 514cef5a63
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 173 additions and 28 deletions

View File

@ -10,6 +10,9 @@ Examples
# same, but list all files as we process them
$ borg create --list my-documents ~/Documents
# Backup /mnt/disk/docs, but strip path prefix using the slashdot hack
$ borg create /path/to/repo::docs /mnt/disk/./docs
# Backup ~/Documents and ~/src but exclude pyc files
$ borg create my-files \
~/Documents \

View File

@ -1361,7 +1361,16 @@ class FilesystemObjectProcessors:
self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True):
def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
if strip_prefix is not None:
assert not path.endswith(os.sep)
if strip_prefix.startswith(path + os.sep):
# still on a directory level that shall be stripped - do not create an item for this!
yield None, "x", False, None
return
# adjust path, remove stripped directory levels
path = path.removeprefix(strip_prefix)
sanitized_path = remove_dotdot_prefixes(path)
item = Item(path=sanitized_path)
hardlinked = hardlinkable and st.st_nlink > 1
@ -1384,13 +1393,26 @@ class FilesystemObjectProcessors:
chunks = item.chunks if "chunks" in item else None
self.hlm.remember(id=(st.st_ino, st.st_dev), info=chunks)
def process_dir_with_fd(self, *, path, fd, st):
with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks):
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
def process_dir_with_fd(self, *, path, fd, st, strip_prefix):
with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
):
if item is not None:
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status
def process_dir(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, "d", hardlinkable=False) as (item, status, hardlinked, hl_chunks):
def process_dir(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, "d", hardlinkable=False, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
):
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open") as fd:
# fd is None for directories on windows, in that case a race condition check is not possible.
if fd is not None:
@ -1399,25 +1421,46 @@ class FilesystemObjectProcessors:
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status
def process_fifo(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, "f") as (item, status, hardlinked, hl_chunks): # fifo
def process_fifo(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, "f", strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
): # fifo
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
with backup_io("fstat"):
st = stat_update_check(st, os.fstat(fd))
item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
return status
def process_dev(self, *, path, parent_fd, name, st, dev_type):
with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hl_chunks): # char/block device
def process_dev(self, *, path, parent_fd, name, st, dev_type, strip_prefix):
with self.create_helper(path, st, dev_type, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
): # char/block device
# looks like we can not work fd-based here without causing issues when trying to open/close the device
if item is None:
return status
with backup_io("stat"):
st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False))
item.rdev = st.st_rdev
item.update(self.metadata_collector.stat_attrs(st, path))
return status
def process_symlink(self, *, path, parent_fd, name, st):
with self.create_helper(path, st, "s", hardlinkable=True) as (item, status, hardlinked, hl_chunks):
def process_symlink(self, *, path, parent_fd, name, st, strip_prefix):
with self.create_helper(path, st, "s", hardlinkable=True, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
):
if item is None:
return status
fname = name if name is not None and parent_fd is not None else path
with backup_io("readlink"):
target = os.readlink(fname, dir_fd=parent_fd)
@ -1466,8 +1509,15 @@ class FilesystemObjectProcessors:
self.add_item(item, stats=self.stats)
return status
def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False):
with self.create_helper(path, st, None) as (item, status, hardlinked, hl_chunks): # no status yet
def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, last_try=False, strip_prefix):
with self.create_helper(path, st, None, strip_prefix=strip_prefix) as (
item,
status,
hardlinked,
hl_chunks,
): # no status yet
if item is None:
return status
with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd:
with backup_io("fstat"):
st = stat_update_check(st, os.fstat(fd))

View File

@ -20,7 +20,7 @@ from ..helpers import comment_validator, ChunkerParams, PathSpec
from ..helpers import archivename_validator, FilesCacheMode
from ..helpers import eval_escapes
from ..helpers import timestamp, archive_ts_now
from ..helpers import get_cache_dir, os_stat
from ..helpers import get_cache_dir, os_stat, get_strip_prefix
from ..helpers import dir_is_tagged
from ..helpers import log_multi
from ..helpers import basic_json_data, json_print
@ -107,6 +107,7 @@ class CreateMixIn:
pipe_bin = sys.stdin.buffer
pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
for path in iter_separated(pipe, paths_sep):
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io("stat"):
@ -120,6 +121,7 @@ class CreateMixIn:
cache=cache,
read_special=args.read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
except BackupError as e:
self.print_warning_instance(BackupWarning(path, e))
@ -157,6 +159,8 @@ class CreateMixIn:
if not dry_run and status is not None:
fso.stats.files_stats[status] += 1
continue
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
try:
with backup_io("stat"):
@ -176,6 +180,7 @@ class CreateMixIn:
restrict_dev=restrict_dev,
read_special=args.read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
# if we get back here, we've finished recursing into <path>,
# we do not ever want to get back in there (even if path is given twice as recursion root)
@ -274,7 +279,7 @@ class CreateMixIn:
else:
create_inner(None, None, None)
def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run):
def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix):
"""
Call the right method on the given FilesystemObjectProcessor.
"""
@ -287,13 +292,21 @@ class CreateMixIn:
try:
if stat.S_ISREG(st.st_mode):
return fso.process_file(
path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, last_try=last_try
path=path,
parent_fd=parent_fd,
name=name,
st=st,
cache=cache,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISDIR(st.st_mode):
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix)
elif stat.S_ISLNK(st.st_mode):
if not read_special:
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_symlink(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
else:
try:
st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
@ -310,12 +323,17 @@ class CreateMixIn:
cache=cache,
flags=flags_special_follow,
last_try=last_try,
strip_prefix=strip_prefix,
)
else:
return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_symlink(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
elif stat.S_ISFIFO(st.st_mode):
if not read_special:
return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
return fso.process_fifo(
path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
@ -325,10 +343,13 @@ class CreateMixIn:
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISCHR(st.st_mode):
if not read_special:
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c")
return fso.process_dev(
path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c", strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
@ -338,10 +359,13 @@ class CreateMixIn:
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISBLK(st.st_mode):
if not read_special:
return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b")
return fso.process_dev(
path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b", strip_prefix=strip_prefix
)
else:
return fso.process_file(
path=path,
@ -351,6 +375,7 @@ class CreateMixIn:
cache=cache,
flags=flags_special,
last_try=last_try,
strip_prefix=strip_prefix,
)
elif stat.S_ISSOCK(st.st_mode):
# Ignore unix sockets
@ -401,6 +426,7 @@ class CreateMixIn:
restrict_dev,
read_special,
dry_run,
strip_prefix,
):
"""
Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
@ -457,6 +483,7 @@ class CreateMixIn:
cache=cache,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
else:
with OsOpen(
@ -474,7 +501,9 @@ class CreateMixIn:
if not recurse_excluded_dir:
if keep_exclude_tags:
if not dry_run:
fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
fso.process_dir_with_fd(
path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
)
for tag_name in tag_names:
tag_path = os.path.join(path, tag_name)
self._rec_walk(
@ -491,12 +520,13 @@ class CreateMixIn:
restrict_dev=restrict_dev,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
self.print_file_status("-", path) # excluded
return
if not recurse_excluded_dir:
if not dry_run:
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix)
else:
status = "+" # included (dir)
if recurse:
@ -518,6 +548,7 @@ class CreateMixIn:
restrict_dev=restrict_dev,
read_special=read_special,
dry_run=dry_run,
strip_prefix=strip_prefix,
)
except BackupError as e:
@ -541,6 +572,11 @@ class CreateMixIn:
that means if relative paths are desired, the command has to be run from the correct
directory.
The slashdot hack in paths (recursion roots) is triggered by using ``/./``:
``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but
strip the prefix on the left side of ``./`` from the archived items (in this case,
``this/gets/archived`` will be the path in the archived item).
When giving '-' as path, borg will read data from standard input and create a
file 'stdin' in the created archive from that data. In some cases it's more
appropriate to use --content-from-command, however. See section *Reading from
@ -680,8 +716,8 @@ class CreateMixIn:
- 'i' = backup data was read from standard input (stdin)
- '?' = missing status code (if you see this, please file a bug report!)
Reading from stdin
++++++++++++++++++
Reading backup data from stdin
++++++++++++++++++++++++++++++
There are two methods to read from stdin. Either specify ``-`` as path and
pipe directly to borg::
@ -712,6 +748,21 @@ class CreateMixIn:
By default, the content read from stdin is stored in a file called 'stdin'.
Use ``--stdin-name`` to change the name.
Feeding all file paths from externally
++++++++++++++++++++++++++++++++++++++
Usually, you give a starting path (recursion root) to borg and then borg
automatically recurses, finds and backs up all fs objects contained in
there (optionally considering include/exclude rules).
If you need more control and you want to give every single fs object path
to borg (maybe implementing your own recursion or your own rules), you can use
``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will
fail to create an archive should the command fail).
Borg supports paths with the slashdot hack to strip path prefixes here also.
So, be careful not to unintentionally trigger that.
"""
)

View File

@ -20,7 +20,7 @@ from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundErro
from .fs import ensure_dir, join_base_dir, get_socket_filename
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount
from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
from .fs import HardLinkManager
from .misc import sysinfo, log_multi, consume

View File

@ -233,6 +233,21 @@ def make_path_safe(path):
return path
def get_strip_prefix(path):
# similar to how rsync does it, we allow users to give paths like:
# /this/gets/stripped/./this/is/kept
# the whole path is what is used to read from the fs,
# the strip_prefix will be /this/gets/stripped/ and
# this/is/kept is the path being archived.
pos = path.find("/./") # detect slashdot hack
if pos > 0:
# found a prefix to strip! make sure it ends with one "/"!
return os.path.normpath(path[:pos]) + os.sep
else:
# no or empty prefix, nothing to strip!
return None
_dotdot_re = re.compile(r"^(\.\./)+")

View File

@ -908,6 +908,32 @@ def test_create_read_special_broken_symlink(archivers, request):
assert "input/link -> somewhere does not exist" in output
def test_create_dotslash_hack(archivers, request):
archiver = request.getfixturevalue(archivers)
os.makedirs(os.path.join(archiver.input_path, "first", "secondA", "thirdA"))
os.makedirs(os.path.join(archiver.input_path, "first", "secondB", "thirdB"))
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test", "input/first/./") # hack!
output = cmd(archiver, "list", "test")
# dir levels left of slashdot (= input, first) not in archive:
assert "input" not in output
assert "input/first" not in output
assert "input/first/secondA" not in output
assert "input/first/secondA/thirdA" not in output
assert "input/first/secondB" not in output
assert "input/first/secondB/thirdB" not in output
assert "first" not in output
assert "first/secondA" not in output
assert "first/secondA/thirdA" not in output
assert "first/secondB" not in output
assert "first/secondB/thirdB" not in output
# dir levels right of slashdot are in archive:
assert "secondA" in output
assert "secondA/thirdA" in output
assert "secondB" in output
assert "secondB/thirdB" in output
def test_log_json(archivers, request):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path)