mirror of
https://github.com/borgbase/vorta
synced 2025-01-03 05:36:19 +00:00
Show backup size with excludes applied. By @freder (#961)
This commit is contained in:
parent
9919070e49
commit
9bad152a80
2 changed files with 93 additions and 11 deletions
|
@ -30,26 +30,97 @@
|
|||
_network_status_monitor = None
|
||||
|
||||
|
||||
# copied from https://github.com/borgbackup/borg/blob/master/src/borg/shellpattern.py
|
||||
def pattern_to_regex(pat, match_end=r"\Z"):
|
||||
"""Translate a shell-style pattern to a regular expression.
|
||||
The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems)
|
||||
for matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception
|
||||
of any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal
|
||||
character "?").
|
||||
Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from
|
||||
the pattern. The default is to match the end of the string.
|
||||
This function is derived from the "fnmatch" module distributed with the Python standard library.
|
||||
Copyright (C) 2001-2016 Python Software Foundation. All rights reserved.
|
||||
TODO: support {alt1,alt2} shell-style alternatives
|
||||
"""
|
||||
sep = os.path.sep
|
||||
n = len(pat)
|
||||
i = 0
|
||||
res = ""
|
||||
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i += 1
|
||||
|
||||
if c == "*":
|
||||
if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep:
|
||||
# **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands
|
||||
# for the platform-specific path separator
|
||||
res += r"(?:[^\%s]*\%s)*" % (sep, sep)
|
||||
i += 2
|
||||
else:
|
||||
# * == wildcard for name parts (does not cross path separator)
|
||||
res += r"[^\%s]*" % sep
|
||||
elif c == "?":
|
||||
# ? == any single character excluding path separator
|
||||
res += r"[^\%s]" % sep
|
||||
elif c == "[":
|
||||
j = i
|
||||
if j < n and pat[j] == "!":
|
||||
j += 1
|
||||
if j < n and pat[j] == "]":
|
||||
j += 1
|
||||
while j < n and pat[j] != "]":
|
||||
j += 1
|
||||
if j >= n:
|
||||
res += "\\["
|
||||
else:
|
||||
stuff = pat[i:j].replace("\\", "\\\\")
|
||||
i = j + 1
|
||||
if stuff[0] == "!":
|
||||
stuff = "^" + stuff[1:]
|
||||
elif stuff[0] == "^":
|
||||
stuff = "\\" + stuff
|
||||
res += "[%s]" % stuff
|
||||
else:
|
||||
res += re.escape(c)
|
||||
|
||||
return "(?ms)" + res + match_end
|
||||
|
||||
|
||||
class FilePathInfoAsync(QThread):
|
||||
signal = pyqtSignal(str, str, str)
|
||||
|
||||
def __init__(self, path):
|
||||
def __init__(self, path, exclude_patterns_str):
|
||||
self.path = path
|
||||
QThread.__init__(self)
|
||||
self.exiting = False
|
||||
self.exclude_patterns = []
|
||||
for _line in (exclude_patterns_str or '').splitlines():
|
||||
line = _line.strip()
|
||||
if line != '':
|
||||
self.exclude_patterns.append(line)
|
||||
# translate exclude patterns to regular expressions
|
||||
self.exclude_patterns_re = [
|
||||
pattern_to_regex(pattern, '')
|
||||
for pattern in self.exclude_patterns
|
||||
]
|
||||
|
||||
def run(self):
|
||||
# logger.info("running thread to get path=%s...", self.path)
|
||||
self.files_count = 0
|
||||
self.size, self.files_count = get_path_datasize(self.path)
|
||||
self.size, self.files_count = get_path_datasize(
|
||||
self.path,
|
||||
self.exclude_patterns_re
|
||||
)
|
||||
self.signal.emit(self.path, str(self.size), str(self.files_count))
|
||||
|
||||
|
||||
def get_directory_size(dir_path):
|
||||
def get_directory_size(dir_path, exclude_patterns_re):
|
||||
''' Get number of files only and total size in bytes from a path.
|
||||
Based off https://stackoverflow.com/a/17936789 '''
|
||||
data_size = 0
|
||||
data_size_filtered = 0
|
||||
seen = set()
|
||||
seen_filtered = set()
|
||||
|
||||
for curr_path, _, file_names in os.walk(dir_path):
|
||||
for file_name in file_names:
|
||||
|
@ -59,17 +130,25 @@ def get_directory_size(dir_path):
|
|||
if os.path.islink(file_path):
|
||||
continue
|
||||
|
||||
is_excluded = False
|
||||
for pattern in exclude_patterns_re:
|
||||
if re.match(pattern, file_path) is not None:
|
||||
is_excluded = True
|
||||
break
|
||||
|
||||
try:
|
||||
stat = os.stat(file_path)
|
||||
if stat.st_ino not in seen: # Visit each file only once
|
||||
seen.add(stat.st_ino)
|
||||
data_size += stat.st_size
|
||||
if not is_excluded:
|
||||
data_size_filtered += stat.st_size
|
||||
seen_filtered.add(stat.st_ino)
|
||||
except (FileNotFoundError, PermissionError):
|
||||
continue
|
||||
|
||||
files_count = len(seen)
|
||||
files_count_filtered = len(seen_filtered)
|
||||
|
||||
return data_size, files_count
|
||||
return data_size_filtered, files_count_filtered
|
||||
|
||||
|
||||
def get_network_status_monitor():
|
||||
|
@ -80,12 +159,15 @@ def get_network_status_monitor():
|
|||
return _network_status_monitor
|
||||
|
||||
|
||||
def get_path_datasize(path):
|
||||
def get_path_datasize(path, exclude_patterns_re):
|
||||
file_info = QFileInfo(path)
|
||||
data_size = 0
|
||||
|
||||
if file_info.isDir():
|
||||
data_size, files_count = get_directory_size(file_info.absoluteFilePath())
|
||||
data_size, files_count = get_directory_size(
|
||||
file_info.absoluteFilePath(),
|
||||
exclude_patterns_re
|
||||
)
|
||||
# logger.info("path (folder) %s %u elements size now=%u (%s)",
|
||||
# file_info.absoluteFilePath(), files_count, data_size, pretty_bytes(data_size))
|
||||
else:
|
||||
|
|
|
@ -100,7 +100,7 @@ def update_path_info(self, index_row):
|
|||
self.sourceFilesWidget.item(index_row, SourceColumn.Type).setText(self.tr("Calculating..."))
|
||||
self.sourceFilesWidget.item(index_row, SourceColumn.Size).setText(self.tr("Calculating..."))
|
||||
self.sourceFilesWidget.item(index_row, SourceColumn.FilesCount).setText(self.tr("Calculating..."))
|
||||
getDir = FilePathInfoAsync(path)
|
||||
getDir = FilePathInfoAsync(path, self.profile().exclude_patterns)
|
||||
getDir.signal.connect(self.set_path_info)
|
||||
getDir.setObjectName(path)
|
||||
self.updateThreads.append(getDir) # this is ugly, is there a better way to keep the thread object?
|
||||
|
|
Loading…
Reference in a new issue