From 9bad152a8006710f42e4bf95e7fca96a28b7ffa3 Mon Sep 17 00:00:00 2001 From: Frederic Brodbeck Date: Mon, 18 Oct 2021 13:41:37 +0200 Subject: [PATCH] Show backup size with excludes applied. By @freder (#961) --- src/vorta/utils.py | 102 ++++++++++++++++++++++++++++++---- src/vorta/views/source_tab.py | 2 +- 2 files changed, 93 insertions(+), 11 deletions(-) diff --git a/src/vorta/utils.py b/src/vorta/utils.py index 2ba95803..aff15242 100644 --- a/src/vorta/utils.py +++ b/src/vorta/utils.py @@ -30,26 +30,97 @@ borg_compat = BorgCompatibility() _network_status_monitor = None +# copied from https://github.com/borgbackup/borg/blob/master/src/borg/shellpattern.py +def pattern_to_regex(pat, match_end=r"\Z"): + """Translate a shell-style pattern to a regular expression. + The pattern may include ``**`` ( stands for the platform-specific path separator; "/" on POSIX systems) + for matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception + of any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal + character "?"). + Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from + the pattern. The default is to match the end of the string. + This function is derived from the "fnmatch" module distributed with the Python standard library. + Copyright (C) 2001-2016 Python Software Foundation. All rights reserved. + TODO: support {alt1,alt2} shell-style alternatives + """ + sep = os.path.sep + n = len(pat) + i = 0 + res = "" + + while i < n: + c = pat[i] + i += 1 + + if c == "*": + if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep: + # **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands + # for the platform-specific path separator + res += r"(?:[^\%s]*\%s)*" % (sep, sep) + i += 2 + else: + # * == wildcard for name parts (does not cross path separator) + res += r"[^\%s]*" % sep + elif c == "?": + # ? == any single character excluding path separator + res += r"[^\%s]" % sep + elif c == "[": + j = i + if j < n and pat[j] == "!": + j += 1 + if j < n and pat[j] == "]": + j += 1 + while j < n and pat[j] != "]": + j += 1 + if j >= n: + res += "\\[" + else: + stuff = pat[i:j].replace("\\", "\\\\") + i = j + 1 + if stuff[0] == "!": + stuff = "^" + stuff[1:] + elif stuff[0] == "^": + stuff = "\\" + stuff + res += "[%s]" % stuff + else: + res += re.escape(c) + + return "(?ms)" + res + match_end + + class FilePathInfoAsync(QThread): signal = pyqtSignal(str, str, str) - def __init__(self, path): + def __init__(self, path, exclude_patterns_str): self.path = path QThread.__init__(self) self.exiting = False + self.exclude_patterns = [] + for _line in (exclude_patterns_str or '').splitlines(): + line = _line.strip() + if line != '': + self.exclude_patterns.append(line) + # translate exclude patterns to regular expressions + self.exclude_patterns_re = [ + pattern_to_regex(pattern, '') + for pattern in self.exclude_patterns + ] def run(self): # logger.info("running thread to get path=%s...", self.path) - self.files_count = 0 - self.size, self.files_count = get_path_datasize(self.path) + self.size, self.files_count = get_path_datasize( + self.path, + self.exclude_patterns_re + ) self.signal.emit(self.path, str(self.size), str(self.files_count)) -def get_directory_size(dir_path): +def get_directory_size(dir_path, exclude_patterns_re): ''' Get number of files only and total size in bytes from a path. Based off https://stackoverflow.com/a/17936789 ''' - data_size = 0 + data_size_filtered = 0 seen = set() + seen_filtered = set() for curr_path, _, file_names in os.walk(dir_path): for file_name in file_names: @@ -59,17 +130,25 @@ def get_directory_size(dir_path): if os.path.islink(file_path): continue + is_excluded = False + for pattern in exclude_patterns_re: + if re.match(pattern, file_path) is not None: + is_excluded = True + break + try: stat = os.stat(file_path) if stat.st_ino not in seen: # Visit each file only once seen.add(stat.st_ino) - data_size += stat.st_size + if not is_excluded: + data_size_filtered += stat.st_size + seen_filtered.add(stat.st_ino) except (FileNotFoundError, PermissionError): continue - files_count = len(seen) + files_count_filtered = len(seen_filtered) - return data_size, files_count + return data_size_filtered, files_count_filtered def get_network_status_monitor(): @@ -80,12 +159,15 @@ def get_network_status_monitor(): return _network_status_monitor -def get_path_datasize(path): +def get_path_datasize(path, exclude_patterns_re): file_info = QFileInfo(path) data_size = 0 if file_info.isDir(): - data_size, files_count = get_directory_size(file_info.absoluteFilePath()) + data_size, files_count = get_directory_size( + file_info.absoluteFilePath(), + exclude_patterns_re + ) # logger.info("path (folder) %s %u elements size now=%u (%s)", # file_info.absoluteFilePath(), files_count, data_size, pretty_bytes(data_size)) else: diff --git a/src/vorta/views/source_tab.py b/src/vorta/views/source_tab.py index 0332f367..c5e8a65f 100644 --- a/src/vorta/views/source_tab.py +++ b/src/vorta/views/source_tab.py @@ -100,7 +100,7 @@ class SourceTab(SourceBase, SourceUI, BackupProfileMixin): self.sourceFilesWidget.item(index_row, SourceColumn.Type).setText(self.tr("Calculating...")) self.sourceFilesWidget.item(index_row, SourceColumn.Size).setText(self.tr("Calculating...")) self.sourceFilesWidget.item(index_row, SourceColumn.FilesCount).setText(self.tr("Calculating...")) - getDir = FilePathInfoAsync(path) + getDir = FilePathInfoAsync(path, self.profile().exclude_patterns) getDir.signal.connect(self.set_path_info) getDir.setObjectName(path) self.updateThreads.append(getDir) # this is ugly, is there a better way to keep the thread object?