1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-01-30 19:21:17 +00:00

Initial cache redesign

This commit is contained in:
Jonas Borgström 2010-10-20 20:28:29 +02:00
parent 32c815800e
commit cf7d12ba50
2 changed files with 28 additions and 33 deletions

View file

@ -61,18 +61,12 @@ def stats(self, cache):
total_osize = 0
total_csize = 0
total_usize = 0
chunk_count = {}
for item in self.items:
if item['type'] == 'FILE':
total_osize += item['size']
for idx in item['chunks']:
id = self.chunk_idx[idx]
chunk_count.setdefault(id, 0)
chunk_count[id] += 1
for id, c in chunk_count.items():
count, size = cache.chunkmap[id]
for id, size in self.chunks:
total_csize += size
if c == count:
if self.cache.seen_chunk(id) == 1:
total_usize += size
return dict(osize=total_osize, csize=total_csize, usize=total_usize)
@ -124,7 +118,6 @@ def verify(self):
for chunk in item['chunks']:
id = self.chunk_idx[chunk]
data = self.store.get(NS_CHUNKS, id)
data = self.store.get(NS_CHUNKS, id)
cid = data[:32]
data = data[32:]
if (hashlib.sha256(data).digest() != cid):
@ -135,20 +128,17 @@ def verify(self):
def delete(self, cache):
self.store.delete(NS_ARCHIVES, self.cache.archives[self.name])
for item in self.items:
if item['type'] == 'FILE':
for c in item['chunks']:
id = self.chunk_idx[c]
cache.chunk_decref(id)
for id, size in self.chunks:
cache.chunk_decref(id)
self.store.commit()
del cache.archives[self.name]
cache.save()
def walk(self, path):
def _walk(self, path):
st = os.lstat(path)
if stat.S_ISDIR(st.st_mode):
for f in os.listdir(path):
for x in self.walk(os.path.join(path, f)):
for x in self._walk(os.path.join(path, f)):
yield x
else:
yield path, st
@ -157,7 +147,7 @@ def create(self, name, paths, cache):
if name in cache.archives:
raise NameError('Archive already exists')
for path in paths:
for path, st in self.walk(unicode(path)):
for path, st in self._walk(unicode(path)):
if stat.S_ISDIR(st.st_mode):
self.process_dir(path, st)
elif stat.S_ISLNK(st.st_mode):
@ -193,8 +183,8 @@ def process_file(self, path, st):
chunks = []
size = 0
for chunk in chunkify(fd, CHUNK_SIZE, 30):
chunks.append(self.process_chunk(chunk))
size += len(chunk)
chunks.append(self.add_chunk(*self.cache.add_chunk(chunk)))
self.items.append({
'type': 'FILE', 'path': path, 'chunks': chunks, 'size': size,
'mode': st.st_mode,
@ -203,5 +193,16 @@ def process_file(self, path, st):
'ctime': st.st_ctime, 'mtime': st.st_mtime,
})
def process_chunk(self, data):
id = hashlib.sha256(data).digest()
try:
return self.chunk_idx[id]
except KeyError:
idx = len(self.chunks)
size = self.cache.add_chunk(id, data)
self.chunks.append((id, size))
self.chunk_idx[idx] = id
return idx

View file

@ -49,18 +49,14 @@ def init(self):
raise Exception('Archive hash did not match')
archive = msgpack.unpackb(zlib.decompress(data))
self.archives[archive['name']] = id
for item in archive['items']:
if item['type'] != 'FILE':
continue
for idx in item['chunks']:
id, size = archive['chunks'][idx]
if self.seen_chunk(id):
self.chunk_incref(id)
else:
self.init_chunk(id, size)
for id, size in archive['chunks']:
try:
count, size = self.chunkmap[id]
self.chunkmap[id] = count + 1, size
except KeyError:
self.chunkmap[id] = 1, size
self.save()
def save(self):
assert self.store.state == self.store.OPEN
data = {'uuid': self.store.uuid,
@ -74,16 +70,14 @@ def save(self):
id = hashlib.sha256(data).digest()
fd.write(id + data)
def add_chunk(self, data):
id = hashlib.sha256(data).digest()
def add_chunk(self, id, data):
if self.seen_chunk(id):
return self.chunk_incref(id)
osize = len(data)
data = zlib.compress(data)
data = hashlib.sha256(data).digest() + data
csize = len(data)
self.store.put(NS_CHUNKS, id, data)
return self.init_chunk(id, csize)
return self.init_chunk(id, csize)[1]
def init_chunk(self, id, size):
self.chunkmap[id] = (1, size)
@ -96,7 +90,7 @@ def seen_chunk(self, id):
def chunk_incref(self, id):
count, size = self.chunkmap[id]
self.chunkmap[id] = (count + 1, size)
return id, size
return size
def chunk_decref(self, id):
count, size = self.chunkmap[id]