From 421a7ef52fa52253daf630e90c45d4e978a42390 Mon Sep 17 00:00:00 2001 From: Thalian Date: Wed, 1 Jun 2022 23:18:56 +0200 Subject: [PATCH] [DOCS] #5310 - Overhaul borg help patterns fixes #5310 --- src/borg/archiver.py | 213 +++++++++++++++++++++++-------------------- 1 file changed, 113 insertions(+), 100 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 7e3f10909..4209b43ea 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2485,40 +2485,35 @@ def do_break_lock(self, args, repository): helptext = collections.OrderedDict() helptext['patterns'] = textwrap.dedent(''' - The path/filenames used as input for the pattern matching start from the - currently active recursion root. You usually give the recursion root(s) - when invoking borg and these can be either relative or absolute paths. + When specifying one or more file paths in a Borg command that supports + patterns for the respective option or argument, you can apply the + patterns described here to include only desired files and/or exclude + unwanted ones. Patterns can be used - If you give `/absolute/` as root, the paths going into the matcher will - look relative like `absolute/.../file.ext`, because file paths in Borg - archives are always stored normalized and relative. This means that e.g. - ``borg create /path/to/repo ../some/path`` will store all files as - `some/path/.../file.ext` and ``borg create /path/to/repo /home/user`` - will store all files as `home/user/.../file.ext`. + - for ``--exclude`` option, + - in the file given with ``--exclude-from`` option, + - for ``--pattern`` option, + - in the file given with ``--patterns-from`` option and + - for ``PATH`` arguments that explicitly support them. - A directory exclusion pattern can end either with or without a slash ('/'). - If it ends with a slash, such as `some/path/`, the directory will be - included but not its content. If it does not end with a slash, such as - `some/path`, both the directory and content will be excluded. + Borg always stores all file paths normalized and relative to the + current recursion root. The recursion root is also named ``PATH`` in + Borg commands like `borg create` that do a file discovery, so do not + confuse the root with the ``PATH`` argument of e.g. `borg extract`. - File patterns support these styles: fnmatch, shell, regular expressions, - path prefixes and path full-matches. By default, fnmatch is used for - ``--exclude`` patterns and shell-style is used for the ``--pattern`` - option. For commands that support patterns in their ``PATH`` argument - like (``borg list``), the default pattern is path prefix. + Starting with Borg 1.2, paths that are matched against patterns always + appear relative. If you give ``/absolute/`` as root, the paths going + into the matcher will look relative like ``absolute/.../file.ext``. + If you give ``../some/path`` as root, the paths will look like + ``some/path/.../file.ext``. - Starting with Borg 1.2, discovered fs paths are normalised, have leading - slashes removed and then are matched against your patterns. - Note: You need to review your include / exclude patterns and make - sure they do not expect leading slashes. Borg can only deal with this - for some very simple patterns by removing leading slashes there also. + File patterns support five different styles. If followed by a colon ':', + the first two characters of a pattern are used as a style selector. + Explicit style selection is necessary if a non-default style is desired + or when the desired pattern starts with two alphanumeric characters + followed by a colon (i.e. ``aa:something/*``). - If followed by a colon (':') the first two characters of a pattern are - used as a style selector. Explicit style selection is necessary when a - non-default style is desired or when the desired pattern starts with - two alphanumeric characters followed by a colon (i.e. `aa:something/*`). - - `Fnmatch `_, selector `fm:` + `Fnmatch `_, selector ``fm:`` This is the default style for ``--exclude`` and ``--exclude-from``. These patterns use a variant of shell pattern syntax, with '\\*' matching any number of characters, '?' matching any single character, '[...]' @@ -2526,7 +2521,7 @@ def do_break_lock(self, args, repository): matching any character not specified. For the purpose of these patterns, the path separator (backslash for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal - match (i.e. `[?]` to match the literal character `?`). For a path + match (i.e. ``[?]`` to match the literal character '?'). For a path to match a pattern, the full path must match, or it must match from the start of the full path to just before a path separator. Except for the root path, paths will never end in the path separator when @@ -2534,33 +2529,31 @@ def do_break_lock(self, args, repository): separator, a '\\*' is appended before matching is attempted. A leading path separator is always removed. - Shell-style patterns, selector `sh:` + Shell-style patterns, selector ``sh:`` This is the default style for ``--pattern`` and ``--patterns-from``. Like fnmatch patterns these are similar to shell patterns. The difference - is that the pattern may include `**/` for matching zero or more directory - levels, `*` for matching zero or more arbitrary characters with the + is that the pattern may include ``**/`` for matching zero or more directory + levels, ``*`` for matching zero or more arbitrary characters with the exception of any path separator. A leading path separator is always removed. - Regular expressions, selector `re:` - Regular expressions similar to those found in Perl are supported. Unlike - shell patterns regular expressions are not required to match the full + `Regular expressions `_, selector ``re:`` + Unlike shell patterns, regular expressions are not required to match the full path and any substring match is sufficient. It is strongly recommended to anchor patterns to the start ('^'), to the end ('$') or both. Path separators (backslash for Windows and '/' on other systems) in paths are - always normalized to a forward slash ('/') before applying a pattern. The - regular expression syntax is described in the `Python documentation for - the re module `_. + always normalized to a forward slash '/' before applying a pattern. - Path prefix, selector `pp:` + Path prefix, selector ``pp:`` This pattern style is useful to match whole sub-directories. The pattern - `pp:root/somedir` matches `root/somedir` and everything therein. A leading - path separator is always removed. + ``pp:root/somedir`` matches ``root/somedir`` and everything therein. + A leading path separator is always removed. - Path full-match, selector `pf:` + Path full-match, selector ``pf:`` This pattern style is (only) useful to match full paths. This is kind of a pseudo pattern as it can not have any variable or - unspecified parts - the full path must be given. `pf:root/file.ext` matches - `root/file.ext` only. A leading path separator is always removed. + unspecified parts - the full path must be given. ``pf:root/file.ext`` + matches ``root/file.ext`` only. A leading path separator is always + removed. Implementation note: this is implemented via very time-efficient O(1) hashtable lookups (this means you can have huge amounts of such patterns @@ -2573,20 +2566,20 @@ def do_break_lock(self, args, repository): .. note:: - `re:`, `sh:` and `fm:` patterns are all implemented on top of the Python SRE - engine. It is very easy to formulate patterns for each of these types which - requires an inordinate amount of time to match paths. If untrusted users - are able to supply patterns, ensure they cannot supply `re:` patterns. - Further, ensure that `sh:` and `fm:` patterns only contain a handful of - wildcards at most. + ``re:``, ``sh:`` and ``fm:`` patterns are all implemented on top of + the Python SRE engine. It is very easy to formulate patterns for each + of these types which requires an inordinate amount of time to match + paths. If untrusted users are able to supply patterns, ensure they + cannot supply ``re:`` patterns. Further, ensure that ``sh:`` and + ``fm:`` patterns only contain a handful of wildcards at most. Exclusions can be passed via the command line option ``--exclude``. When used from within a shell, the patterns should be quoted to protect them from expansion. The ``--exclude-from`` option permits loading exclusion patterns from a text - file with one pattern per line. Lines empty or starting with the number sign - ('#') after removing whitespace on both ends are ignored. The optional style + file with one pattern per line. Lines empty or starting with the hash sign + '#' after removing whitespace on both ends are ignored. The optional style selector prefix is also supported for patterns loaded from a file. Due to whitespace removal, paths with whitespace at the beginning or end can only be excluded using regular expressions. @@ -2597,21 +2590,21 @@ def do_break_lock(self, args, repository): Examples:: # Exclude '/home/user/file.o' but not '/home/user/file.odt': - $ borg create -e '*.o' backup / + $ borg create -e '*.o' /path/to/repo::archive / # Exclude '/home/user/junk' and '/home/user/subdir/junk' but # not '/home/user/importantjunk' or '/etc/junk': - $ borg create -e 'home/*/junk' backup / + $ borg create -e 'home/*/junk' /path/to/repo::archive / # Exclude the contents of '/home/user/cache' but not the directory itself: - $ borg create -e home/user/cache/ backup / + $ borg create -e home/user/cache/ /path/to/repo::archive / # The file '/home/user/cache/important' is *not* backed up: - $ borg create -e home/user/cache/ backup / /home/user/cache/important + $ borg create -e home/user/cache/ /path/to/repo::archive / /home/user/cache/important # The contents of directories in '/home' are not backed up when their name # ends in '.tmp' - $ borg create --exclude 're:^home/[^/]+\\.tmp/' backup / + $ borg create --exclude 're:^home/[^/]+\\.tmp/' /path/to/repo::archive / # Load exclusions from file $ cat >exclude.txt <