1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-03-09 21:57:24 +00:00

hashindex: add NSIndex1 write support, fix iteritems

to get the unit tests working, we better have write support for the
borg 1.x format.

iteritems grew support for the marker kwarg.
This commit is contained in:
Thomas Waldmann 2024-10-25 18:02:56 +02:00
parent 64bda1a636
commit 7fffb0dbb1
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01

View file

@ -116,11 +116,16 @@ NSIndex1Entry = namedtuple('NSIndex1bEntry', 'segment offset')
class NSIndex1: # legacy borg 1.x class NSIndex1: # legacy borg 1.x
MAX_VALUE = 2**32 - 1 # borghash has the full uint32_t range MAX_VALUE = 2**32 - 1 # borghash has the full uint32_t range
MAGIC = b"BORG_IDX" # borg 1.x
HEADER_FMT = "<8sIIBB" # magic, entries, buckets, ksize, vsize
VALUE_FMT = "<II" # borg 1.x on-disk: little-endian segment, offset
KEY_SIZE = 32
VALUE_SIZE = 8
def __init__(self, capacity=1000, path=None, permit_compact=False, usable=None): def __init__(self, capacity=1000, path=None, permit_compact=False, usable=None):
if usable is not None: if usable is not None:
capacity = usable * 2 # load factor 0.5 capacity = usable * 2 # load factor 0.5
self.ht = _borghash.HashTableNT(key_size=32, value_format="<II", namedtuple_type=NSIndex1Entry, capacity=capacity) self.ht = _borghash.HashTableNT(key_size=self.KEY_SIZE, value_format=self.VALUE_FMT, namedtuple_type=NSIndex1Entry, capacity=capacity)
if path: if path:
self._read(path) self._read(path)
@ -155,8 +160,13 @@ class NSIndex1: # legacy borg 1.x
return default return default
raise raise
def iteritems(self): def iteritems(self, marker=None):
yield from self.ht.iteritems() do_yield = marker is None
for key, value in self.ht.iteritems():
if do_yield:
yield key, value
else:
do_yield = key == marker
def compact(self): def compact(self):
return 0 return 0
@ -168,12 +178,16 @@ class NSIndex1: # legacy borg 1.x
def read(cls, path, permit_compact=False): def read(cls, path, permit_compact=False):
return cls(path=path) return cls(path=path)
def write(self, path):
self.ht.write(path) # only for unit tests
def size(self): def size(self):
return self.ht.size() # not quite correct as this is not the on-disk read-only format. return self.ht.size() # not quite correct as this is not the on-disk read-only format.
def write(self, path):
if isinstance(path, str):
with open(path, 'wb') as fd:
self._write_fd(fd)
else:
self._write_fd(path)
def _read(self, path): def _read(self, path):
if isinstance(path, str): if isinstance(path, str):
with open(path, 'rb') as fd: with open(path, 'rb') as fd:
@ -181,27 +195,28 @@ class NSIndex1: # legacy borg 1.x
else: else:
self._read_fd(path) self._read_fd(path)
def _read_fd(self, fd): def _write_fd(self, fd):
magic = fd.read(8) used = len(self.ht)
fd.seek(0) header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
if magic == b"BORG_IDX": # used for borg transfer borg 1.x -> borg 2 fd.write(header_bytes)
self._read_fd_borg1(fd) count = 0
if magic == b"BORGHASH": # only for unit tests for key, _ in self.ht.iteritems():
self.ht = _borghash.HashTableNT.read(fd) value = self.ht._get_raw(key)
fd.write(key)
fd.write(value)
count += 1
assert count == used
def _read_fd_borg1(self, fd): def _read_fd(self, fd):
MAGIC = b"BORG_IDX" # borg 1.x header_size = struct.calcsize(self.HEADER_FMT)
HEADER_FMT = "<8sIIBB" # magic, entries, buckets, ksize, vsize
header_size = struct.calcsize(HEADER_FMT)
header_bytes = fd.read(header_size) header_bytes = fd.read(header_size)
if len(header_bytes) < header_size: if len(header_bytes) < header_size:
raise ValueError(f"Invalid file, file is too short (header).") raise ValueError(f"Invalid file, file is too short (header).")
magic, entries, buckets, ksize, vsize = struct.unpack(HEADER_FMT, header_bytes) magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
if magic != MAGIC: if magic != self.MAGIC:
raise ValueError(f"Invalid file, magic {MAGIC.decode()} not found.") raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.")
VALUE_FMT = "<II" # borg 1.x on-disk: little-endian segment, offset assert ksize == self.KEY_SIZE, "invalid key size"
assert ksize == 32, "invalid key size" assert vsize == self.VALUE_SIZE, "invalid value size"
assert vsize == struct.calcsize(VALUE_FMT), "invalid value size"
buckets_size = buckets * (ksize + vsize) buckets_size = buckets * (ksize + vsize)
current_pos = fd.tell() current_pos = fd.tell()
end_of_file = fd.seek(0, os.SEEK_END) end_of_file = fd.seek(0, os.SEEK_END)