From b64427c48081f298d354d6b301f1d19eba485dd0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 8 Sep 2022 12:38:55 +0200 Subject: [PATCH] simplify: iter_objects always returns (..., size, data) data might be None (if read_data is False). also removed the include_data argument, not needed any more. --- src/borg/repository.py | 29 +++++++++++++---------------- src/borg/testsuite/repository.py | 6 +++--- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index 6ed1a2689..243db2a62 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -830,7 +830,7 @@ class Repository: freeable_ratio * 100.0, freeable_space, ) - for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): + for tag, key, offset, _, data in self.io.iter_objects(segment): if tag == TAG_COMMIT: continue in_index = self.index.get(key) @@ -961,7 +961,7 @@ class Repository: def _update_index(self, segment, objects, report=None): """some code shared between replay_segments and check""" self.segments[segment] = 0 - for tag, key, offset, size in objects: + for tag, key, offset, size, _ in objects: if tag in (TAG_PUT2, TAG_PUT): try: # If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space @@ -1011,7 +1011,7 @@ class Repository: return self.compact[segment] = 0 - for tag, key, offset, size in self.io.iter_objects(segment, read_data=False): + for tag, key, offset, size, _ in self.io.iter_objects(segment, read_data=False): if tag in (TAG_PUT2, TAG_PUT): in_index = self.index.get(key) if not in_index or (in_index.segment, in_index.offset) != (segment, offset): @@ -1165,8 +1165,8 @@ class Repository: if segment is not None and current_segment > segment: break try: - for tag, key, current_offset, data in self.io.iter_objects( - segment=current_segment, offset=offset or 0, include_data=True + for tag, key, current_offset, _, data in self.io.iter_objects( + segment=current_segment, offset=offset or 0 ): if offset is not None and current_offset > offset: break @@ -1229,10 +1229,10 @@ class Repository: start_segment, start_offset, _ = (0, 0, 0) if at_start else self.index[marker] result = [] for segment, filename in self.io.segment_iterator(start_segment): - obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False, include_data=False) + obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False) while True: try: - tag, id, offset, size = next(obj_iterator) + tag, id, offset, size, _ = next(obj_iterator) except (StopIteration, IntegrityError): # either end-of-segment or an error - we can not seek to objects at # higher offsets than one that has an error in the header fields. @@ -1458,7 +1458,7 @@ class LoggedIO: seen_commit = False while True: try: - tag, key, offset, _ = next(iterator) + tag, key, offset, _, _ = next(iterator) except IntegrityError: return False except StopIteration: @@ -1560,15 +1560,13 @@ class LoggedIO: fd.seek(0) return fd.read(MAGIC_LEN) - def iter_objects(self, segment, offset=0, include_data=False, read_data=True): + def iter_objects(self, segment, offset=0, read_data=True): """ Return object iterator for *segment*. - If read_data is False then include_data must be False as well. - See the _read() docstring about confidence in the returned data. - The iterator returns four-tuples of (tag, key, offset, data|size). + The iterator returns five-tuples of (tag, key, offset, size, data). """ fd = self.get_fd(segment) fd.seek(offset) @@ -1584,10 +1582,9 @@ class LoggedIO: size, tag, key, data = self._read( fd, header, segment, offset, (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT), read_data=read_data ) - if include_data: - yield tag, key, offset, data - else: - yield tag, key, offset, size - header_size(tag) # corresponds to len(data) + # tuple[3]: corresponds to len(data) == length of the full chunk payload (meta_len+enc_meta+enc_data) + # tuple[4]: data will be None if read_data is False. + yield tag, key, offset, size - header_size(tag), data assert size >= 0 offset += size # we must get the fd via get_fd() here again as we yielded to our caller and it might diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 45669ff69..ecb6000c7 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -60,7 +60,7 @@ class RepositoryTestCaseBase(BaseTestCase): H_trans[None] = -1 # key == None appears in commits tag_trans = {TAG_PUT2: "put2", TAG_PUT: "put", TAG_DELETE: "del", TAG_COMMIT: "comm"} for segment, fn in self.repository.io.segment_iterator(): - for tag, key, offset, size in self.repository.io.iter_objects(segment): + for tag, key, offset, size, _ in self.repository.io.iter_objects(segment): print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size)) print() @@ -372,7 +372,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.repo_dump("d1 cc") last_segment = self.repository.io.get_latest_segment() - 1 num_deletes = 0 - for tag, key, offset, size in self.repository.io.iter_objects(last_segment): + for tag, key, offset, size, _ in self.repository.io.iter_objects(last_segment): if tag == TAG_DELETE: assert key == H(1) num_deletes += 1 @@ -384,7 +384,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): assert last_segment not in self.repository.compact assert not self.repository.io.segment_exists(last_segment) for segment, _ in self.repository.io.segment_iterator(): - for tag, key, offset, size in self.repository.io.iter_objects(segment): + for tag, key, offset, size, _ in self.repository.io.iter_objects(segment): assert tag != TAG_DELETE assert key != H(1) # after compaction, there should be no empty shadowed_segments lists left over.