Show backup size with excludes applied. By @freder (#961)

This commit is contained in:
Frederic Brodbeck 2021-10-18 13:41:37 +02:00 committed by GitHub
parent 9919070e49
commit 9bad152a80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 93 additions and 11 deletions

View File

@ -30,26 +30,97 @@ borg_compat = BorgCompatibility()
_network_status_monitor = None
# copied from https://github.com/borgbackup/borg/blob/master/src/borg/shellpattern.py
def pattern_to_regex(pat, match_end=r"\Z"):
"""Translate a shell-style pattern to a regular expression.
The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems)
for matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception
of any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal
character "?").
Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from
the pattern. The default is to match the end of the string.
This function is derived from the "fnmatch" module distributed with the Python standard library.
Copyright (C) 2001-2016 Python Software Foundation. All rights reserved.
TODO: support {alt1,alt2} shell-style alternatives
"""
sep = os.path.sep
n = len(pat)
i = 0
res = ""
while i < n:
c = pat[i]
i += 1
if c == "*":
if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep:
# **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands
# for the platform-specific path separator
res += r"(?:[^\%s]*\%s)*" % (sep, sep)
i += 2
else:
# * == wildcard for name parts (does not cross path separator)
res += r"[^\%s]*" % sep
elif c == "?":
# ? == any single character excluding path separator
res += r"[^\%s]" % sep
elif c == "[":
j = i
if j < n and pat[j] == "!":
j += 1
if j < n and pat[j] == "]":
j += 1
while j < n and pat[j] != "]":
j += 1
if j >= n:
res += "\\["
else:
stuff = pat[i:j].replace("\\", "\\\\")
i = j + 1
if stuff[0] == "!":
stuff = "^" + stuff[1:]
elif stuff[0] == "^":
stuff = "\\" + stuff
res += "[%s]" % stuff
else:
res += re.escape(c)
return "(?ms)" + res + match_end
class FilePathInfoAsync(QThread):
signal = pyqtSignal(str, str, str)
def __init__(self, path):
def __init__(self, path, exclude_patterns_str):
self.path = path
QThread.__init__(self)
self.exiting = False
self.exclude_patterns = []
for _line in (exclude_patterns_str or '').splitlines():
line = _line.strip()
if line != '':
self.exclude_patterns.append(line)
# translate exclude patterns to regular expressions
self.exclude_patterns_re = [
pattern_to_regex(pattern, '')
for pattern in self.exclude_patterns
]
def run(self):
# logger.info("running thread to get path=%s...", self.path)
self.files_count = 0
self.size, self.files_count = get_path_datasize(self.path)
self.size, self.files_count = get_path_datasize(
self.path,
self.exclude_patterns_re
)
self.signal.emit(self.path, str(self.size), str(self.files_count))
def get_directory_size(dir_path):
def get_directory_size(dir_path, exclude_patterns_re):
''' Get number of files only and total size in bytes from a path.
Based off https://stackoverflow.com/a/17936789 '''
data_size = 0
data_size_filtered = 0
seen = set()
seen_filtered = set()
for curr_path, _, file_names in os.walk(dir_path):
for file_name in file_names:
@ -59,17 +130,25 @@ def get_directory_size(dir_path):
if os.path.islink(file_path):
continue
is_excluded = False
for pattern in exclude_patterns_re:
if re.match(pattern, file_path) is not None:
is_excluded = True
break
try:
stat = os.stat(file_path)
if stat.st_ino not in seen: # Visit each file only once
seen.add(stat.st_ino)
data_size += stat.st_size
if not is_excluded:
data_size_filtered += stat.st_size
seen_filtered.add(stat.st_ino)
except (FileNotFoundError, PermissionError):
continue
files_count = len(seen)
files_count_filtered = len(seen_filtered)
return data_size, files_count
return data_size_filtered, files_count_filtered
def get_network_status_monitor():
@ -80,12 +159,15 @@ def get_network_status_monitor():
return _network_status_monitor
def get_path_datasize(path):
def get_path_datasize(path, exclude_patterns_re):
file_info = QFileInfo(path)
data_size = 0
if file_info.isDir():
data_size, files_count = get_directory_size(file_info.absoluteFilePath())
data_size, files_count = get_directory_size(
file_info.absoluteFilePath(),
exclude_patterns_re
)
# logger.info("path (folder) %s %u elements size now=%u (%s)",
# file_info.absoluteFilePath(), files_count, data_size, pretty_bytes(data_size))
else:

View File

@ -100,7 +100,7 @@ class SourceTab(SourceBase, SourceUI, BackupProfileMixin):
self.sourceFilesWidget.item(index_row, SourceColumn.Type).setText(self.tr("Calculating..."))
self.sourceFilesWidget.item(index_row, SourceColumn.Size).setText(self.tr("Calculating..."))
self.sourceFilesWidget.item(index_row, SourceColumn.FilesCount).setText(self.tr("Calculating..."))
getDir = FilePathInfoAsync(path)
getDir = FilePathInfoAsync(path, self.profile().exclude_patterns)
getDir.signal.connect(self.set_path_info)
getDir.setObjectName(path)
self.updateThreads.append(getDir) # this is ugly, is there a better way to keep the thread object?