mirror of
https://github.com/borgbackup/borg.git
synced 2025-03-10 06:03:38 +00:00
Merge pull request #7606 from nain-F49FF806/data-scandir-unify
unify scanning and listing of segment dirs, files and apply good practices
This commit is contained in:
commit
912dc81a2d
2 changed files with 38 additions and 12 deletions
|
@ -62,6 +62,12 @@ LIST_SCAN_LIMIT = 100000
|
|||
|
||||
DEFAULT_SEGMENTS_PER_DIR = 1000
|
||||
|
||||
# Some bounds on segment / segment_dir indexes
|
||||
MIN_SEGMENT_INDEX = 0
|
||||
MAX_SEGMENT_INDEX = 2**32 - 1
|
||||
MIN_SEGMENT_DIR_INDEX = 0
|
||||
MAX_SEGMENT_DIR_INDEX = 2**32 - 1
|
||||
|
||||
FD_MAX_AGE = 4 * 60 # 4 minutes
|
||||
|
||||
CHUNK_MIN_EXP = 19 # 2**19 == 512kiB
|
||||
|
|
|
@ -1319,28 +1319,48 @@ class LoggedIO:
|
|||
safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED')
|
||||
fd.close()
|
||||
|
||||
def get_segment_dirs(self, data_dir, start_index=MIN_SEGMENT_DIR_INDEX, end_index=MAX_SEGMENT_DIR_INDEX):
|
||||
"""Returns generator yielding required segment dirs in data_dir as `os.DirEntry` objects.
|
||||
Start and end are inclusive.
|
||||
"""
|
||||
segment_dirs = (
|
||||
f
|
||||
for f in os.scandir(data_dir)
|
||||
if f.is_dir() and f.name.isdigit() and start_index <= int(f.name) <= end_index
|
||||
)
|
||||
return segment_dirs
|
||||
|
||||
def get_segment_files(self, segment_dir, start_index=MIN_SEGMENT_INDEX, end_index=MAX_SEGMENT_INDEX):
|
||||
"""Returns generator yielding required segment files in segment_dir as `os.DirEntry` objects.
|
||||
Start and end are inclusive.
|
||||
"""
|
||||
segment_files = (
|
||||
f
|
||||
for f in os.scandir(segment_dir)
|
||||
if f.is_file() and f.name.isdigit() and start_index <= int(f.name) <= end_index
|
||||
)
|
||||
return segment_files
|
||||
|
||||
def segment_iterator(self, segment=None, reverse=False):
|
||||
if segment is None:
|
||||
segment = 0 if not reverse else 2 ** 32 - 1
|
||||
data_path = os.path.join(self.path, 'data')
|
||||
segment = MIN_SEGMENT_INDEX if not reverse else MAX_SEGMENT_INDEX
|
||||
start_segment_dir = segment // self.segments_per_dir
|
||||
dirs = os.listdir(data_path)
|
||||
data_path = os.path.join(self.path, 'data')
|
||||
if not reverse:
|
||||
dirs = [dir for dir in dirs if dir.isdigit() and int(dir) >= start_segment_dir]
|
||||
dirs = self.get_segment_dirs(data_path, start_index=start_segment_dir)
|
||||
else:
|
||||
dirs = [dir for dir in dirs if dir.isdigit() and int(dir) <= start_segment_dir]
|
||||
dirs = sorted(dirs, key=int, reverse=reverse)
|
||||
dirs = self.get_segment_dirs(data_path, end_index=start_segment_dir)
|
||||
dirs = sorted(dirs, key=lambda dir: int(dir.name), reverse=reverse)
|
||||
for dir in dirs:
|
||||
filenames = os.listdir(os.path.join(data_path, dir))
|
||||
if not reverse:
|
||||
filenames = [filename for filename in filenames if filename.isdigit() and int(filename) >= segment]
|
||||
files = self.get_segment_files(dir, start_index=segment)
|
||||
else:
|
||||
filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment]
|
||||
filenames = sorted(filenames, key=int, reverse=reverse)
|
||||
for filename in filenames:
|
||||
files = self.get_segment_files(dir, end_index=segment)
|
||||
files = sorted(files, key=lambda file: int(file.name), reverse=reverse)
|
||||
for file in files:
|
||||
# Note: Do not filter out logically deleted segments (see "File system interaction" above),
|
||||
# since this is used by cleanup and txn state detection as well.
|
||||
yield int(filename), os.path.join(data_path, dir, filename)
|
||||
yield int(file.name), file.path
|
||||
|
||||
def get_latest_segment(self):
|
||||
for segment, filename in self.segment_iterator(reverse=True):
|
||||
|
|
Loading…
Add table
Reference in a new issue