mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-03 05:35:58 +00:00
bugfix: remove superfluous repository.list() call
Because it ended the loop only when .list() returned an empty result, this always needed one call more than necessary. We can also detect that we are finished, if .list() returns less than the limit we gave to it. Also: reduce code duplication by using repo_lister func.
This commit is contained in:
parent
7d02fe2b8f
commit
1436bbba1a
5 changed files with 71 additions and 91 deletions
|
@ -10,7 +10,7 @@
|
|||
from ..helpers import ProgressIndicatorPercent
|
||||
from ..manifest import Manifest
|
||||
from ..remote import RemoteRepository
|
||||
from ..repository import Repository
|
||||
from ..repository import Repository, repo_lister
|
||||
|
||||
from ..logger import create_logger
|
||||
|
||||
|
@ -49,13 +49,7 @@ def garbage_collect(self):
|
|||
def get_repository_chunks(self) -> ChunkIndex:
|
||||
"""Build a dict id -> size of all chunks present in the repository"""
|
||||
chunks = ChunkIndex()
|
||||
marker = None
|
||||
while True:
|
||||
result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1][0]
|
||||
for id, stored_size in result:
|
||||
for id, stored_size in repo_lister(self.repository, limit=LIST_SCAN_LIMIT):
|
||||
# we add this id to the chunks index, using refcount == 0, because
|
||||
# we do not know yet whether it is actually referenced from some archives.
|
||||
# we "abuse" the size field here. usually there is the plaintext size,
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
from ..helpers import CommandError, RTError
|
||||
from ..manifest import Manifest
|
||||
from ..platform import get_process_id
|
||||
from ..repository import Repository, LIST_SCAN_LIMIT
|
||||
from ..repository import Repository, LIST_SCAN_LIMIT, repo_lister
|
||||
from ..repoobj import RepoObj
|
||||
|
||||
from ._common import with_repository, Highlander
|
||||
|
@ -130,13 +130,7 @@ def decrypt_dump(id, cdata):
|
|||
cdata = repository.get(id)
|
||||
key = key_factory(repository, cdata)
|
||||
repo_objs = RepoObj(key)
|
||||
marker = None
|
||||
while True:
|
||||
result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1][0]
|
||||
for id, stored_size in result:
|
||||
for id, stored_size in repo_lister(repository, limit=LIST_SCAN_LIMIT):
|
||||
cdata = repository.get(id)
|
||||
decrypt_dump(id, cdata)
|
||||
print("Done.")
|
||||
|
@ -177,16 +171,10 @@ def print_finding(info, wanted, data, offset):
|
|||
key = key_factory(repository, cdata)
|
||||
repo_objs = RepoObj(key)
|
||||
|
||||
marker = None
|
||||
last_data = b""
|
||||
last_id = None
|
||||
i = 0
|
||||
while True:
|
||||
result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1][0]
|
||||
for id, stored_size in result:
|
||||
for id, stored_size in repo_lister(repository, limit=LIST_SCAN_LIMIT):
|
||||
cdata = repository.get(id)
|
||||
_, data = repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE)
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
from .manifest import Manifest
|
||||
from .platform import SaveFile
|
||||
from .remote import RemoteRepository
|
||||
from .repository import LIST_SCAN_LIMIT, Repository, StoreObjectNotFound
|
||||
from .repository import LIST_SCAN_LIMIT, Repository, StoreObjectNotFound, repo_lister
|
||||
|
||||
# chunks is a list of ChunkListEntry
|
||||
FileCacheEntry = namedtuple("FileCacheEntry", "age inode size ctime mtime chunks")
|
||||
|
@ -680,20 +680,12 @@ def build_chunkindex_from_repo(repository, *, disable_caches=False, cache_immedi
|
|||
logger.debug("querying the chunk IDs list from the repo...")
|
||||
chunks = ChunkIndex()
|
||||
t0 = perf_counter()
|
||||
num_requests = 0
|
||||
num_chunks = 0
|
||||
marker = None
|
||||
while True:
|
||||
result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
||||
num_requests += 1
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1][0]
|
||||
# The repo says it has these chunks, so we assume they are referenced chunks.
|
||||
# We do not care for refcounting anymore, so we just set refcount = MAX_VALUE.
|
||||
# We do not know the plaintext size (!= stored_size), thus we set size = 0.
|
||||
init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
|
||||
for id, stored_size in result:
|
||||
for id, stored_size in repo_lister(repository, limit=LIST_SCAN_LIMIT):
|
||||
num_chunks += 1
|
||||
chunks[id] = init_entry
|
||||
# Cache does not contain the manifest.
|
||||
|
@ -703,7 +695,7 @@ def build_chunkindex_from_repo(repository, *, disable_caches=False, cache_immedi
|
|||
# Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
|
||||
# Protocol overhead is neglected in this calculation.
|
||||
speed = format_file_size(num_chunks * 34 / duration)
|
||||
logger.debug(f"queried {num_chunks} chunk IDs in {duration} s ({num_requests} requests), ~{speed}/s")
|
||||
logger.debug(f"queried {num_chunks} chunk IDs in {duration} s, ~{speed}/s")
|
||||
if cache_immediately:
|
||||
# immediately update cache/chunks, so we only rarely have to do it the slow way:
|
||||
write_chunkindex_to_repo_cache(repository, chunks, compact=False, clear=False, force_write=True)
|
||||
|
|
|
@ -18,6 +18,18 @@
|
|||
logger = create_logger(__name__)
|
||||
|
||||
|
||||
def repo_lister(repository, *, limit=None):
|
||||
marker = None
|
||||
finished = False
|
||||
while not finished:
|
||||
result = repository.list(limit=limit, marker=marker)
|
||||
finished = (len(result) < limit) if limit is not None else (len(result) == 0)
|
||||
if not finished:
|
||||
marker = result[-1][0]
|
||||
for id, stored_size in result:
|
||||
yield id, stored_size
|
||||
|
||||
|
||||
class Repository:
|
||||
"""borgstore based key value store"""
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
|
||||
from ...constants import * # NOQA
|
||||
from ...repository import Repository
|
||||
from ...repository import Repository, repo_lister
|
||||
from ...manifest import Manifest
|
||||
from ...compress import ZSTD, ZLIB, LZ4, CNONE
|
||||
from ...helpers import bin_to_hex
|
||||
|
@ -15,13 +15,7 @@ def check_compression(ctype, clevel, olevel):
|
|||
repository = Repository(archiver.repository_path, exclusive=True)
|
||||
with repository:
|
||||
manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
|
||||
marker = None
|
||||
while True:
|
||||
result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
|
||||
if not result:
|
||||
break
|
||||
marker = result[-1][0]
|
||||
for id, _ in result:
|
||||
for id, _ in repo_lister(repository, limit=LIST_SCAN_LIMIT):
|
||||
chunk = repository.get(id, read_data=True)
|
||||
meta, data = manifest.repo_objs.parse(
|
||||
id, chunk, ro_type=ROBJ_DONTCARE
|
||||
|
|
Loading…
Reference in a new issue