repository.scan: fix start_offset

start_offset (from state) is only valid for start_segment, otherwise we must use 0.
2025-02-25 23:43:49 +00:00 · 2022-09-20 21:25:36 +02:00 · 2022-09-20 21:25:36 +02:00 · b3c7d8f5f3
commit b3c7d8f5f3
parent 63f736be4f
1 changed files with 5 additions and 1 deletions
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -1235,6 +1235,10 @@ def scan(self, limit=None, state=None):
        # we only scan up to end_segment == transaction_id to only scan **committed** chunks,
        # avoiding scanning into newly written chunks.
        for segment, filename in self.io.segment_iterator(start_segment, end_segment):
+            # the start_offset we potentially got from state is only valid for the start_segment we also got
+            # from there. in case the segment file vanished meanwhile, the segment_iterator might never
+            # return a segment/filename corresponding to the start_segment and we must start from offset 0 then.
+            start_offset = start_offset if segment == start_segment else 0
            obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False)
            while True:
                try:
@ -1244,7 +1248,7 @@ def scan(self, limit=None, state=None):
                    # higher offsets than one that has an error in the header fields.
                    break
                if start_offset > 0:
-                    # we are using a marker and the marker points to the last object we have already
+                    # we are using a state != None and it points to the last object we have already
                    # returned in the previous scan() call - thus, we need to skip this one object.
                    # also, for the next segment, we need to start at offset 0.
                    start_offset = 0