mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-25 01:06:50 +00:00
Initial rough implementation of chunkification cache
This commit is contained in:
parent
00a98082ab
commit
e181829365
2 changed files with 59 additions and 16 deletions
|
@ -243,6 +243,7 @@ def process_symlink(self, path, st):
|
|||
})
|
||||
def process_file(self, path, st, cache):
|
||||
safe_path = path.lstrip('/\\:')
|
||||
# Is it a hard link?
|
||||
if st.st_nlink > 1:
|
||||
source = self.hard_links.get((st.st_ino, st.st_dev))
|
||||
if (st.st_ino, st.st_dev) in self.hard_links:
|
||||
|
@ -252,18 +253,34 @@ def process_file(self, path, st, cache):
|
|||
return
|
||||
else:
|
||||
self.hard_links[st.st_ino, st.st_dev] = safe_path
|
||||
try:
|
||||
fd = open(path, 'rb')
|
||||
except IOError, e:
|
||||
logging.error(e)
|
||||
return
|
||||
with fd:
|
||||
logging.info(safe_path)
|
||||
chunks = []
|
||||
size = 0
|
||||
for chunk in chunkify(fd, CHUNK_SIZE, 30):
|
||||
chunks.append(self.process_chunk(chunk, cache))
|
||||
size += len(chunk)
|
||||
logging.info(safe_path)
|
||||
path_hash = self.crypto.id_hash(path.encode('utf-8'))
|
||||
ids, size = cache.file_known_and_unchanged(path_hash, st)
|
||||
if ids is not None:
|
||||
# Make sure all ids are available
|
||||
for id in ids:
|
||||
if not cache.seen_chunk(id):
|
||||
ids = None
|
||||
break
|
||||
else:
|
||||
chunks = [self.process_chunk2(id, cache) for id in ids]
|
||||
# Only chunkify the file if needed
|
||||
if ids is None:
|
||||
try:
|
||||
fd = open(path, 'rb')
|
||||
except IOError, e:
|
||||
logging.error(e)
|
||||
return
|
||||
with fd:
|
||||
size = 0
|
||||
ids = []
|
||||
chunks = []
|
||||
for chunk in chunkify(fd, CHUNK_SIZE, 30):
|
||||
ids.append(self.crypto.id_hash(chunk))
|
||||
chunks.append(chunk)
|
||||
size += len(chunk)
|
||||
cache.memorize_file_chunks(path_hash, st, ids)
|
||||
chunks = [self.process_chunk(chunk, cache) for chunk in chunks]
|
||||
self.items.append({
|
||||
'type': 'FILE', 'path': safe_path, 'chunks': chunks, 'size': size,
|
||||
'mode': st.st_mode,
|
||||
|
@ -272,6 +289,16 @@ def process_file(self, path, st, cache):
|
|||
'ctime': st.st_ctime, 'mtime': st.st_mtime,
|
||||
})
|
||||
|
||||
def process_chunk2(self, id, cache):
|
||||
try:
|
||||
return self.chunk_idx[id]
|
||||
except KeyError:
|
||||
idx = len(self.chunks)
|
||||
size = cache.chunk_incref(id)
|
||||
self.chunks.append((id, size))
|
||||
self.chunk_idx[id] = idx
|
||||
return idx
|
||||
|
||||
def process_chunk(self, data, cache):
|
||||
id = self.crypto.id_hash(data)
|
||||
try:
|
||||
|
|
|
@ -14,6 +14,7 @@ def __init__(self, store, crypto):
|
|||
self.path = os.path.join(os.path.expanduser('~'), '.dedupestore', 'cache',
|
||||
'%s.cache' % self.store.uuid)
|
||||
self.tid = -1
|
||||
self.file_chunks = {}
|
||||
self.open()
|
||||
if self.tid != self.store.tid:
|
||||
self.init(crypto)
|
||||
|
@ -22,13 +23,15 @@ def open(self):
|
|||
if not os.path.exists(self.path):
|
||||
return
|
||||
cache = msgpack.unpackb(open(self.path, 'rb').read())
|
||||
version = cache.get('version')
|
||||
if version != 1:
|
||||
logging.error('Unsupported cache version %r' % version)
|
||||
return
|
||||
assert cache['version'] == 1
|
||||
if cache['store'] != self.store.uuid:
|
||||
raise Exception('Cache UUID mismatch')
|
||||
self.chunkmap = cache['chunkmap']
|
||||
# Discard old file_chunks entries
|
||||
for hash, entry in cache['file_chunks'].iteritems():
|
||||
count = entry[0]
|
||||
if count < 8:
|
||||
self.file_chunks[hash] = [count + 1] + list(entry[1:])
|
||||
self.tid = cache['tid']
|
||||
|
||||
def init(self, crypto):
|
||||
|
@ -56,6 +59,7 @@ def save(self):
|
|||
'store': self.store.uuid,
|
||||
'chunkmap': self.chunkmap,
|
||||
'tid': self.store.tid,
|
||||
'file_chunks': self.file_chunks,
|
||||
}
|
||||
data = msgpack.packb(cache)
|
||||
cachedir = os.path.dirname(self.path)
|
||||
|
@ -90,4 +94,16 @@ def chunk_decref(self, id):
|
|||
else:
|
||||
self.chunkmap[id] = (count - 1, size)
|
||||
|
||||
def file_known_and_unchanged(self, path_hash, st):
|
||||
entry = self.file_chunks.get(path_hash)
|
||||
if (entry and entry[1] == st.st_ino
|
||||
and entry[2] == st.st_size and entry[3] == st.st_mtime):
|
||||
entry[0] = 0 # reset entry age
|
||||
return entry[4], entry[2]
|
||||
else:
|
||||
return None, 0
|
||||
|
||||
def memorize_file_chunks(self, path_hash, st, ids):
|
||||
# Entry: Age, inode, size, mtime, chunk ids
|
||||
self.file_chunks[path_hash] = 0, st.st_ino, st.st_size, st.st_mtime, ids
|
||||
|
||||
|
|
Loading…
Reference in a new issue