mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-30 19:21:17 +00:00
Initial cache redesign
This commit is contained in:
parent
32c815800e
commit
cf7d12ba50
2 changed files with 28 additions and 33 deletions
|
@ -61,18 +61,12 @@ def stats(self, cache):
|
|||
total_osize = 0
|
||||
total_csize = 0
|
||||
total_usize = 0
|
||||
chunk_count = {}
|
||||
for item in self.items:
|
||||
if item['type'] == 'FILE':
|
||||
total_osize += item['size']
|
||||
for idx in item['chunks']:
|
||||
id = self.chunk_idx[idx]
|
||||
chunk_count.setdefault(id, 0)
|
||||
chunk_count[id] += 1
|
||||
for id, c in chunk_count.items():
|
||||
count, size = cache.chunkmap[id]
|
||||
for id, size in self.chunks:
|
||||
total_csize += size
|
||||
if c == count:
|
||||
if self.cache.seen_chunk(id) == 1:
|
||||
total_usize += size
|
||||
return dict(osize=total_osize, csize=total_csize, usize=total_usize)
|
||||
|
||||
|
@ -124,7 +118,6 @@ def verify(self):
|
|||
for chunk in item['chunks']:
|
||||
id = self.chunk_idx[chunk]
|
||||
data = self.store.get(NS_CHUNKS, id)
|
||||
data = self.store.get(NS_CHUNKS, id)
|
||||
cid = data[:32]
|
||||
data = data[32:]
|
||||
if (hashlib.sha256(data).digest() != cid):
|
||||
|
@ -135,20 +128,17 @@ def verify(self):
|
|||
|
||||
def delete(self, cache):
|
||||
self.store.delete(NS_ARCHIVES, self.cache.archives[self.name])
|
||||
for item in self.items:
|
||||
if item['type'] == 'FILE':
|
||||
for c in item['chunks']:
|
||||
id = self.chunk_idx[c]
|
||||
for id, size in self.chunks:
|
||||
cache.chunk_decref(id)
|
||||
self.store.commit()
|
||||
del cache.archives[self.name]
|
||||
cache.save()
|
||||
|
||||
def walk(self, path):
|
||||
def _walk(self, path):
|
||||
st = os.lstat(path)
|
||||
if stat.S_ISDIR(st.st_mode):
|
||||
for f in os.listdir(path):
|
||||
for x in self.walk(os.path.join(path, f)):
|
||||
for x in self._walk(os.path.join(path, f)):
|
||||
yield x
|
||||
else:
|
||||
yield path, st
|
||||
|
@ -157,7 +147,7 @@ def create(self, name, paths, cache):
|
|||
if name in cache.archives:
|
||||
raise NameError('Archive already exists')
|
||||
for path in paths:
|
||||
for path, st in self.walk(unicode(path)):
|
||||
for path, st in self._walk(unicode(path)):
|
||||
if stat.S_ISDIR(st.st_mode):
|
||||
self.process_dir(path, st)
|
||||
elif stat.S_ISLNK(st.st_mode):
|
||||
|
@ -193,8 +183,8 @@ def process_file(self, path, st):
|
|||
chunks = []
|
||||
size = 0
|
||||
for chunk in chunkify(fd, CHUNK_SIZE, 30):
|
||||
chunks.append(self.process_chunk(chunk))
|
||||
size += len(chunk)
|
||||
chunks.append(self.add_chunk(*self.cache.add_chunk(chunk)))
|
||||
self.items.append({
|
||||
'type': 'FILE', 'path': path, 'chunks': chunks, 'size': size,
|
||||
'mode': st.st_mode,
|
||||
|
@ -203,5 +193,16 @@ def process_file(self, path, st):
|
|||
'ctime': st.st_ctime, 'mtime': st.st_mtime,
|
||||
})
|
||||
|
||||
def process_chunk(self, data):
|
||||
id = hashlib.sha256(data).digest()
|
||||
try:
|
||||
return self.chunk_idx[id]
|
||||
except KeyError:
|
||||
idx = len(self.chunks)
|
||||
size = self.cache.add_chunk(id, data)
|
||||
self.chunks.append((id, size))
|
||||
self.chunk_idx[idx] = id
|
||||
return idx
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -49,18 +49,14 @@ def init(self):
|
|||
raise Exception('Archive hash did not match')
|
||||
archive = msgpack.unpackb(zlib.decompress(data))
|
||||
self.archives[archive['name']] = id
|
||||
for item in archive['items']:
|
||||
if item['type'] != 'FILE':
|
||||
continue
|
||||
for idx in item['chunks']:
|
||||
id, size = archive['chunks'][idx]
|
||||
if self.seen_chunk(id):
|
||||
self.chunk_incref(id)
|
||||
else:
|
||||
self.init_chunk(id, size)
|
||||
for id, size in archive['chunks']:
|
||||
try:
|
||||
count, size = self.chunkmap[id]
|
||||
self.chunkmap[id] = count + 1, size
|
||||
except KeyError:
|
||||
self.chunkmap[id] = 1, size
|
||||
self.save()
|
||||
|
||||
|
||||
def save(self):
|
||||
assert self.store.state == self.store.OPEN
|
||||
data = {'uuid': self.store.uuid,
|
||||
|
@ -74,16 +70,14 @@ def save(self):
|
|||
id = hashlib.sha256(data).digest()
|
||||
fd.write(id + data)
|
||||
|
||||
def add_chunk(self, data):
|
||||
id = hashlib.sha256(data).digest()
|
||||
def add_chunk(self, id, data):
|
||||
if self.seen_chunk(id):
|
||||
return self.chunk_incref(id)
|
||||
osize = len(data)
|
||||
data = zlib.compress(data)
|
||||
data = hashlib.sha256(data).digest() + data
|
||||
csize = len(data)
|
||||
self.store.put(NS_CHUNKS, id, data)
|
||||
return self.init_chunk(id, csize)
|
||||
return self.init_chunk(id, csize)[1]
|
||||
|
||||
def init_chunk(self, id, size):
|
||||
self.chunkmap[id] = (1, size)
|
||||
|
@ -96,7 +90,7 @@ def seen_chunk(self, id):
|
|||
def chunk_incref(self, id):
|
||||
count, size = self.chunkmap[id]
|
||||
self.chunkmap[id] = (count + 1, size)
|
||||
return id, size
|
||||
return size
|
||||
|
||||
def chunk_decref(self, id):
|
||||
count, size = self.chunkmap[id]
|
||||
|
|
Loading…
Reference in a new issue