mirror of https://github.com/borgbase/vorta
Use fnmatch for exclude patterns. By @real-yfprojects (#1253)
This commit is contained in:
parent
5e94679507
commit
1b2d39e8f7
|
@ -184,7 +184,7 @@
|
|||
<string/>
|
||||
</property>
|
||||
<property name="placeholderText">
|
||||
<string>E.g. **/.cache</string>
|
||||
<string>E.g. */.cache</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import argparse
|
||||
import errno
|
||||
import fnmatch
|
||||
import getpass
|
||||
import os
|
||||
import platform
|
||||
|
@ -31,64 +32,6 @@ borg_compat = BorgCompatibility()
|
|||
_network_status_monitor = None
|
||||
|
||||
|
||||
# copied from https://github.com/borgbackup/borg/blob/master/src/borg/shellpattern.py
|
||||
def pattern_to_regex(pat, match_end=r"\Z"):
|
||||
"""Translate a shell-style pattern to a regular expression.
|
||||
The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems)
|
||||
for matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception
|
||||
of any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal
|
||||
character "?").
|
||||
Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from
|
||||
the pattern. The default is to match the end of the string.
|
||||
This function is derived from the "fnmatch" module distributed with the Python standard library.
|
||||
Copyright (C) 2001-2016 Python Software Foundation. All rights reserved.
|
||||
TODO: support {alt1,alt2} shell-style alternatives
|
||||
"""
|
||||
sep = os.path.sep
|
||||
n = len(pat)
|
||||
i = 0
|
||||
res = ""
|
||||
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i += 1
|
||||
|
||||
if c == "*":
|
||||
if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep:
|
||||
# **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands
|
||||
# for the platform-specific path separator
|
||||
res += r"(?:[^\%s]*\%s)*" % (sep, sep)
|
||||
i += 2
|
||||
else:
|
||||
# * == wildcard for name parts (does not cross path separator)
|
||||
res += r"[^\%s]*" % sep
|
||||
elif c == "?":
|
||||
# ? == any single character excluding path separator
|
||||
res += r"[^\%s]" % sep
|
||||
elif c == "[":
|
||||
j = i
|
||||
if j < n and pat[j] == "!":
|
||||
j += 1
|
||||
if j < n and pat[j] == "]":
|
||||
j += 1
|
||||
while j < n and pat[j] != "]":
|
||||
j += 1
|
||||
if j >= n:
|
||||
res += "\\["
|
||||
else:
|
||||
stuff = pat[i:j].replace("\\", "\\\\")
|
||||
i = j + 1
|
||||
if stuff[0] == "!":
|
||||
stuff = "^" + stuff[1:]
|
||||
elif stuff[0] == "^":
|
||||
stuff = "\\" + stuff
|
||||
res += "[%s]" % stuff
|
||||
else:
|
||||
res += re.escape(c)
|
||||
|
||||
return "(?ms)" + res + match_end
|
||||
|
||||
|
||||
class FilePathInfoAsync(QThread):
|
||||
signal = pyqtSignal(str, str, str)
|
||||
|
||||
|
@ -101,45 +44,83 @@ class FilePathInfoAsync(QThread):
|
|||
line = _line.strip()
|
||||
if line != '':
|
||||
self.exclude_patterns.append(line)
|
||||
# translate exclude patterns to regular expressions
|
||||
self.exclude_patterns_re = [
|
||||
pattern_to_regex(pattern, '')
|
||||
for pattern in self.exclude_patterns
|
||||
]
|
||||
|
||||
def run(self):
|
||||
# logger.info("running thread to get path=%s...", self.path)
|
||||
self.size, self.files_count = get_path_datasize(
|
||||
self.path,
|
||||
self.exclude_patterns_re
|
||||
self.exclude_patterns
|
||||
)
|
||||
self.signal.emit(self.path, str(self.size), str(self.files_count))
|
||||
|
||||
|
||||
def get_directory_size(dir_path, exclude_patterns_re):
|
||||
def normalize_path(path):
|
||||
"""normalize paths for MacOS (but do nothing on other platforms)"""
|
||||
# HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
|
||||
# Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
|
||||
return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
|
||||
|
||||
|
||||
# prepare patterns as borg does
|
||||
# see `FnmatchPattern._prepare` at
|
||||
# https://github.com/borgbackup/borg/blob/master//src/borg/patterns.py
|
||||
def prepare_pattern(pattern):
|
||||
"""Prepare and process fnmatch patterns as borg does"""
|
||||
if pattern.endswith(os.path.sep):
|
||||
# trailing sep indicates that the contents should be excluded
|
||||
# but not the directory it self.
|
||||
pattern = os.path.normpath(pattern).rstrip(os.path.sep)
|
||||
pattern += os.path.sep + '*' + os.path.sep
|
||||
else:
|
||||
pattern = os.path.normpath(pattern) + os.path.sep + '*'
|
||||
|
||||
pattern = pattern.lstrip(os.path.sep) # sep at beginning is removed
|
||||
return re.compile(fnmatch.translate(pattern))
|
||||
|
||||
|
||||
def match(pattern: re.Pattern, path: str):
|
||||
"""Check whether a path matches the given pattern."""
|
||||
path = path.lstrip(os.path.sep) + os.path.sep
|
||||
return pattern.match(path) is not None
|
||||
|
||||
|
||||
def get_directory_size(dir_path, exclude_patterns):
|
||||
''' Get number of files only and total size in bytes from a path.
|
||||
Based off https://stackoverflow.com/a/17936789 '''
|
||||
exclude_patterns = [prepare_pattern(p) for p in exclude_patterns]
|
||||
|
||||
data_size_filtered = 0
|
||||
seen = set()
|
||||
seen_filtered = set()
|
||||
|
||||
for curr_path, _, file_names in os.walk(dir_path):
|
||||
for dir_path, subdirectories, file_names in os.walk(dir_path, topdown=True):
|
||||
is_excluded = False
|
||||
for pattern in exclude_patterns:
|
||||
if match(pattern, dir_path):
|
||||
is_excluded = True
|
||||
break
|
||||
|
||||
if is_excluded:
|
||||
subdirectories.clear() # so that os.walk won't walk them
|
||||
continue
|
||||
|
||||
for file_name in file_names:
|
||||
file_path = os.path.join(curr_path, file_name)
|
||||
file_path = os.path.join(dir_path, file_name)
|
||||
|
||||
# Ignore symbolic links, since borg doesn't follow them
|
||||
if os.path.islink(file_path):
|
||||
continue
|
||||
|
||||
is_excluded = False
|
||||
for pattern in exclude_patterns_re:
|
||||
if re.match(pattern, file_path) is not None:
|
||||
for pattern in exclude_patterns:
|
||||
if match(pattern, file_path):
|
||||
is_excluded = True
|
||||
break
|
||||
|
||||
try:
|
||||
stat = os.stat(file_path)
|
||||
if stat.st_ino not in seen: # Visit each file only once
|
||||
# this won't add the size of a hardlinked file
|
||||
seen.add(stat.st_ino)
|
||||
if not is_excluded:
|
||||
data_size_filtered += stat.st_size
|
||||
|
@ -160,14 +141,14 @@ def get_network_status_monitor():
|
|||
return _network_status_monitor
|
||||
|
||||
|
||||
def get_path_datasize(path, exclude_patterns_re):
|
||||
def get_path_datasize(path, exclude_patterns):
|
||||
file_info = QFileInfo(path)
|
||||
data_size = 0
|
||||
|
||||
if file_info.isDir():
|
||||
data_size, files_count = get_directory_size(
|
||||
file_info.absoluteFilePath(),
|
||||
exclude_patterns_re
|
||||
exclude_patterns
|
||||
)
|
||||
# logger.info("path (folder) %s %u elements size now=%u (%s)",
|
||||
# file_info.absoluteFilePath(), files_count, data_size, pretty_bytes(data_size))
|
||||
|
|
Loading…
Reference in New Issue