mirror of https://github.com/borgbackup/borg.git
413 lines
16 KiB
Python
413 lines
16 KiB
Python
import errno
|
|
import hashlib
|
|
import os
|
|
import os.path
|
|
import re
|
|
import stat
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
|
|
from .errors import Error
|
|
|
|
from .process import prepare_subprocess_env
|
|
from ..platformflags import is_win32
|
|
|
|
from ..constants import * # NOQA
|
|
|
|
from ..logger import create_logger
|
|
logger = create_logger()
|
|
|
|
|
|
py_37_plus = sys.version_info >= (3, 7)
|
|
|
|
|
|
def ensure_dir(path, mode=stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO, pretty_deadly=True):
|
|
"""
|
|
Ensures that the dir exists with the right permissions.
|
|
1) Make sure the directory exists in a race-free operation
|
|
2) If mode is not None and the directory has been created, give the right
|
|
permissions to the leaf directory. The current umask value is masked out first.
|
|
3) If pretty_deadly is True, catch exceptions, reraise them with a pretty
|
|
message.
|
|
Returns if the directory has been created and has the right permissions,
|
|
An exception otherwise. If a deadly exception happened it is reraised.
|
|
"""
|
|
try:
|
|
os.makedirs(path, mode=mode, exist_ok=True)
|
|
except OSError as e:
|
|
if pretty_deadly:
|
|
raise Error(str(e))
|
|
else:
|
|
raise
|
|
|
|
|
|
def get_base_dir():
|
|
"""Get home directory / base directory for borg:
|
|
|
|
- BORG_BASE_DIR, if set
|
|
- HOME, if set
|
|
- ~$USER, if USER is set
|
|
- ~
|
|
"""
|
|
base_dir = os.environ.get('BORG_BASE_DIR') or os.environ.get('HOME')
|
|
# os.path.expanduser() behaves differently for '~' and '~someuser' as
|
|
# parameters: when called with an explicit username, the possibly set
|
|
# environment variable HOME is no longer respected. So we have to check if
|
|
# it is set and only expand the user's home directory if HOME is unset.
|
|
if not base_dir:
|
|
base_dir = os.path.expanduser('~%s' % os.environ.get('USER', ''))
|
|
return base_dir
|
|
|
|
|
|
def get_keys_dir():
|
|
"""Determine where to repository keys and cache"""
|
|
keys_dir = os.environ.get('BORG_KEYS_DIR')
|
|
if keys_dir is None:
|
|
# note: do not just give this as default to the environment.get(), see issue #5979.
|
|
keys_dir = os.path.join(get_config_dir(), 'keys')
|
|
ensure_dir(keys_dir)
|
|
return keys_dir
|
|
|
|
|
|
def get_security_dir(repository_id=None):
|
|
"""Determine where to store local security information."""
|
|
security_dir = os.environ.get('BORG_SECURITY_DIR')
|
|
if security_dir is None:
|
|
# note: do not just give this as default to the environment.get(), see issue #5979.
|
|
security_dir = os.path.join(get_config_dir(), 'security')
|
|
if repository_id:
|
|
security_dir = os.path.join(security_dir, repository_id)
|
|
ensure_dir(security_dir)
|
|
return security_dir
|
|
|
|
|
|
def get_cache_dir():
|
|
"""Determine where to repository keys and cache"""
|
|
# Get cache home path
|
|
cache_home = os.path.join(get_base_dir(), '.cache')
|
|
# Try to use XDG_CACHE_HOME instead if BORG_BASE_DIR isn't explicitly set
|
|
if not os.environ.get('BORG_BASE_DIR'):
|
|
cache_home = os.environ.get('XDG_CACHE_HOME', cache_home)
|
|
# Use BORG_CACHE_DIR if set, otherwise assemble final path from cache home path
|
|
cache_dir = os.environ.get('BORG_CACHE_DIR', os.path.join(cache_home, 'borg'))
|
|
# Create path if it doesn't exist yet
|
|
ensure_dir(cache_dir)
|
|
cache_tag_fn = os.path.join(cache_dir, CACHE_TAG_NAME)
|
|
if not os.path.exists(cache_tag_fn):
|
|
cache_tag_contents = CACHE_TAG_CONTENTS + textwrap.dedent("""
|
|
# This file is a cache directory tag created by Borg.
|
|
# For information about cache directory tags, see:
|
|
# http://www.bford.info/cachedir/spec.html
|
|
""").encode('ascii')
|
|
from ..platform import SaveFile
|
|
with SaveFile(cache_tag_fn, binary=True) as fd:
|
|
fd.write(cache_tag_contents)
|
|
return cache_dir
|
|
|
|
|
|
def get_config_dir():
|
|
"""Determine where to store whole config"""
|
|
# Get config home path
|
|
config_home = os.path.join(get_base_dir(), '.config')
|
|
# Try to use XDG_CONFIG_HOME instead if BORG_BASE_DIR isn't explicitly set
|
|
if not os.environ.get('BORG_BASE_DIR'):
|
|
config_home = os.environ.get('XDG_CONFIG_HOME', config_home)
|
|
# Use BORG_CONFIG_DIR if set, otherwise assemble final path from config home path
|
|
config_dir = os.environ.get('BORG_CONFIG_DIR', os.path.join(config_home, 'borg'))
|
|
# Create path if it doesn't exist yet
|
|
ensure_dir(config_dir)
|
|
return config_dir
|
|
|
|
|
|
def dir_is_cachedir(path):
|
|
"""Determines whether the specified path is a cache directory (and
|
|
therefore should potentially be excluded from the backup) according to
|
|
the CACHEDIR.TAG protocol
|
|
(http://www.bford.info/cachedir/spec.html).
|
|
"""
|
|
|
|
tag_path = os.path.join(path, CACHE_TAG_NAME)
|
|
try:
|
|
if os.path.exists(tag_path):
|
|
with open(tag_path, 'rb') as tag_file:
|
|
tag_data = tag_file.read(len(CACHE_TAG_CONTENTS))
|
|
if tag_data == CACHE_TAG_CONTENTS:
|
|
return True
|
|
except OSError:
|
|
pass
|
|
return False
|
|
|
|
|
|
def dir_is_tagged(path, exclude_caches, exclude_if_present):
|
|
"""Determines whether the specified path is excluded by being a cache
|
|
directory or containing user-specified tag files/directories. Returns a
|
|
list of the names of the tag files/directories (either CACHEDIR.TAG or the
|
|
matching user-specified files/directories).
|
|
"""
|
|
# TODO: do operations based on the directory fd
|
|
tag_names = []
|
|
if exclude_caches and dir_is_cachedir(path):
|
|
tag_names.append(CACHE_TAG_NAME)
|
|
if exclude_if_present is not None:
|
|
for tag in exclude_if_present:
|
|
tag_path = os.path.join(path, tag)
|
|
if os.path.exists(tag_path):
|
|
tag_names.append(tag)
|
|
return tag_names
|
|
|
|
|
|
_safe_re = re.compile(r'^((\.\.)?/+)+')
|
|
|
|
|
|
def make_path_safe(path):
|
|
"""Make path safe by making it relative and local
|
|
"""
|
|
return _safe_re.sub('', path) or '.'
|
|
|
|
|
|
class HardLinkManager:
|
|
"""
|
|
Manage hardlinks (and avoid code duplication doing so).
|
|
|
|
A) When creating a borg2 archive from the filesystem, we have to maintain a mapping like:
|
|
(dev, ino) -> (hlid, chunks) # for fs_hl_targets
|
|
If we encounter the same (dev, ino) again later, we'll just re-use the hlid and chunks list.
|
|
|
|
B) When extracting a borg2 archive to the filesystem, we have to maintain a mapping like:
|
|
hlid -> path
|
|
If we encounter the same hlid again later, we hardlink to the path of the already extracted content of same hlid.
|
|
|
|
C) When transferring from a borg1 archive, we need:
|
|
path -> chunks, chunks_healthy # for borg1_hl_targets
|
|
If we encounter a regular file item with source == path later, we reuse chunks and chunks_healthy
|
|
and create the same hlid = hardlink_id_from_path(source).
|
|
|
|
D) When importing a tar file (simplified 1-pass way for now, not creating borg hardlink items):
|
|
path -> chunks
|
|
If we encounter a LNK tar entry later with linkname==path, we re-use the chunks and create a regular file item.
|
|
For better hardlink support (including the very first hardlink item for each group of same-target hardlinks),
|
|
we would need a 2-pass processing, which is not yet implemented.
|
|
"""
|
|
def __init__(self, *, id_type, info_type):
|
|
self._map = {}
|
|
self.id_type = id_type
|
|
self.info_type = info_type
|
|
|
|
def borg1_hardlinkable(self, mode): # legacy
|
|
return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
|
|
|
|
def borg1_hardlink_master(self, item): # legacy
|
|
return item.get('hardlink_master', True) and 'source' not in item and self.borg1_hardlinkable(item.mode)
|
|
|
|
def borg1_hardlink_slave(self, item): # legacy
|
|
return 'source' in item and self.borg1_hardlinkable(item.mode)
|
|
|
|
def hardlink_id_from_path(self, path):
|
|
"""compute a hardlink id from a path"""
|
|
assert isinstance(path, str)
|
|
return hashlib.sha256(path.encode('utf-8', errors='surrogateescape')).digest()
|
|
|
|
def hardlink_id_from_inode(self, *, ino, dev):
|
|
"""compute a hardlink id from an inode"""
|
|
assert isinstance(ino, int)
|
|
assert isinstance(dev, int)
|
|
return hashlib.sha256(f'{ino}/{dev}'.encode()).digest()
|
|
|
|
def remember(self, *, id, info):
|
|
"""
|
|
remember stuff from a (usually contentful) item.
|
|
|
|
:param id: some id used to reference to the contentful item, could be:
|
|
a path (tar style, old borg style) [bytes]
|
|
a hlid (new borg style) [bytes]
|
|
a (dev, inode) tuple (filesystem)
|
|
:param info: information to remember, could be:
|
|
chunks / chunks_healthy list
|
|
hlid
|
|
"""
|
|
assert isinstance(id, self.id_type), f"key is {key!r}, not of type {self.key_type}"
|
|
assert isinstance(info, self.info_type), f"info is {info!r}, not of type {self.info_type}"
|
|
self._map[id] = info
|
|
|
|
def retrieve(self, id, *, default=None):
|
|
"""
|
|
retrieve stuff to use it in a (usually contentless) item.
|
|
"""
|
|
assert isinstance(id, self.id_type)
|
|
return self._map.get(id, default)
|
|
|
|
|
|
def scandir_keyfunc(dirent):
|
|
try:
|
|
return (0, dirent.inode())
|
|
except OSError as e:
|
|
# maybe a permission denied error while doing a stat() on the dirent
|
|
logger.debug('scandir_inorder: Unable to stat %s: %s', dirent.path, e)
|
|
# order this dirent after all the others lexically by file name
|
|
# we may not break the whole scandir just because of an exception in one dirent
|
|
# ignore the exception for now, since another stat will be done later anyways
|
|
# (or the entry will be skipped by an exclude pattern)
|
|
return (1, dirent.name)
|
|
|
|
|
|
def scandir_inorder(*, path, fd=None):
|
|
# py37+ supports giving an fd instead of a path (no full entry.path in DirEntry in that case!)
|
|
arg = fd if fd is not None and py_37_plus else path
|
|
return sorted(os.scandir(arg), key=scandir_keyfunc)
|
|
|
|
|
|
def secure_erase(path, *, avoid_collateral_damage):
|
|
"""Attempt to securely erase a file by writing random data over it before deleting it.
|
|
|
|
If avoid_collateral_damage is True, we only secure erase if the total link count is 1,
|
|
otherwise we just do a normal "delete" (unlink) without first overwriting it with random.
|
|
This avoids other hardlinks pointing to same inode as <path> getting damaged, but might be less secure.
|
|
A typical scenario where this is useful are quick "hardlink copies" of bigger directories.
|
|
|
|
If avoid_collateral_damage is False, we always secure erase.
|
|
If there are hardlinks pointing to the same inode as <path>, they will contain random garbage afterwards.
|
|
"""
|
|
with open(path, 'r+b') as fd:
|
|
st = os.stat(fd.fileno())
|
|
if not (st.st_nlink > 1 and avoid_collateral_damage):
|
|
fd.write(os.urandom(st.st_size))
|
|
fd.flush()
|
|
os.fsync(fd.fileno())
|
|
os.unlink(path)
|
|
|
|
|
|
def safe_unlink(path):
|
|
"""
|
|
Safely unlink (delete) *path*.
|
|
|
|
If we run out of space while deleting the file, we try truncating it first.
|
|
BUT we truncate only if path is the only hardlink referring to this content.
|
|
|
|
Use this when deleting potentially large files when recovering
|
|
from a VFS error such as ENOSPC. It can help a full file system
|
|
recover. Refer to the "File system interaction" section
|
|
in repository.py for further explanations.
|
|
"""
|
|
try:
|
|
os.unlink(path)
|
|
except OSError as unlink_err:
|
|
if unlink_err.errno != errno.ENOSPC:
|
|
# not free space related, give up here.
|
|
raise
|
|
# we ran out of space while trying to delete the file.
|
|
st = os.stat(path)
|
|
if st.st_nlink > 1:
|
|
# rather give up here than cause collateral damage to the other hardlink.
|
|
raise
|
|
# no other hardlink! try to recover free space by truncating this file.
|
|
try:
|
|
# Do not create *path* if it does not exist, open for truncation in r+b mode (=O_RDWR|O_BINARY).
|
|
with open(path, 'r+b') as fd:
|
|
fd.truncate()
|
|
except OSError:
|
|
# truncate didn't work, so we still have the original unlink issue - give up:
|
|
raise unlink_err
|
|
else:
|
|
# successfully truncated the file, try again deleting it:
|
|
os.unlink(path)
|
|
|
|
|
|
def dash_open(path, mode):
|
|
assert '+' not in mode # the streams are either r or w, but never both
|
|
if path == '-':
|
|
stream = sys.stdin if 'r' in mode else sys.stdout
|
|
return stream.buffer if 'b' in mode else stream
|
|
else:
|
|
return open(path, mode)
|
|
|
|
|
|
def O_(*flags):
|
|
result = 0
|
|
for flag in flags:
|
|
result |= getattr(os, 'O_' + flag, 0)
|
|
return result
|
|
|
|
|
|
flags_base = O_('BINARY', 'NOCTTY', 'RDONLY')
|
|
flags_special = flags_base | O_('NOFOLLOW') # BLOCK == wait when reading devices or fifos
|
|
flags_special_follow = flags_base # BLOCK == wait when reading symlinked devices or fifos
|
|
flags_normal = flags_base | O_('NONBLOCK', 'NOFOLLOW')
|
|
flags_noatime = flags_normal | O_('NOATIME')
|
|
flags_root = O_('RDONLY')
|
|
flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW')
|
|
|
|
|
|
def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False):
|
|
"""
|
|
Use os.open to open a fs item.
|
|
|
|
If parent_fd and name are given, they are preferred and openat will be used,
|
|
path is not used in this case.
|
|
|
|
:param path: full (but not necessarily absolute) path
|
|
:param parent_fd: open directory file descriptor
|
|
:param name: name relative to parent_fd
|
|
:param flags: open flags for os.open() (int)
|
|
:param noatime: True if access time shall be preserved
|
|
:return: file descriptor
|
|
"""
|
|
if name and parent_fd is not None:
|
|
# name is neither None nor empty, parent_fd given.
|
|
fname = name # use name relative to parent_fd
|
|
else:
|
|
fname, parent_fd = path, None # just use the path
|
|
if is_win32 and os.path.isdir(fname):
|
|
# Directories can not be opened on Windows.
|
|
return None
|
|
_flags_normal = flags
|
|
if noatime:
|
|
_flags_noatime = _flags_normal | O_('NOATIME')
|
|
try:
|
|
# if we have O_NOATIME, this likely will succeed if we are root or owner of file:
|
|
fd = os.open(fname, _flags_noatime, dir_fd=parent_fd)
|
|
except PermissionError:
|
|
if _flags_noatime == _flags_normal:
|
|
# we do not have O_NOATIME, no need to try again:
|
|
raise
|
|
# Was this EPERM due to the O_NOATIME flag? Try again without it:
|
|
fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
|
|
except OSError as exc:
|
|
# O_NOATIME causes EROFS when accessing a volume shadow copy in WSL1
|
|
from . import workarounds
|
|
if 'retry_erofs' in workarounds and exc.errno == errno.EROFS and _flags_noatime != _flags_normal:
|
|
fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
|
|
else:
|
|
raise
|
|
else:
|
|
fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
|
|
return fd
|
|
|
|
|
|
def os_stat(*, path=None, parent_fd=None, name=None, follow_symlinks=False):
|
|
"""
|
|
Use os.stat to open a fs item.
|
|
|
|
If parent_fd and name are given, they are preferred and statat will be used,
|
|
path is not used in this case.
|
|
|
|
:param path: full (but not necessarily absolute) path
|
|
:param parent_fd: open directory file descriptor
|
|
:param name: name relative to parent_fd
|
|
:return: stat info
|
|
"""
|
|
if name and parent_fd is not None:
|
|
# name is neither None nor empty, parent_fd given.
|
|
fname = name # use name relative to parent_fd
|
|
else:
|
|
fname, parent_fd = path, None # just use the path
|
|
return os.stat(fname, dir_fd=parent_fd, follow_symlinks=follow_symlinks)
|
|
|
|
|
|
def umount(mountpoint):
|
|
env = prepare_subprocess_env(system=True)
|
|
try:
|
|
return subprocess.call(['fusermount', '-u', mountpoint], env=env)
|
|
except FileNotFoundError:
|
|
return subprocess.call(['umount', mountpoint], env=env)
|