mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-31 03:31:41 +00:00
Some chunker fixes.
This commit is contained in:
parent
9f955bf9fb
commit
0cca830981
2 changed files with 19 additions and 11 deletions
|
@ -73,11 +73,14 @@ def add_chunk(self, data):
|
|||
#print 'chunk %d: %d' % (len(data), sum)
|
||||
hash = struct.pack('I', sum) + hashlib.sha1(data).digest()
|
||||
if not self.seen_chunk(hash):
|
||||
self.store.put(NS_CHUNKS, hash, zlib.compress(data))
|
||||
zdata = zlib.compress(data)
|
||||
size = len(zdata)
|
||||
self.store.put(NS_CHUNKS, hash, zdata)
|
||||
else:
|
||||
print 'seen chunk', hash.encode('hex')
|
||||
size = 0
|
||||
#print 'seen chunk', hash.encode('hex')
|
||||
self.chunk_incref(hash)
|
||||
return hash
|
||||
return hash, size
|
||||
|
||||
def seen_chunk(self, hash):
|
||||
return self.chunkmap.get(hash, 0) > 0
|
||||
|
@ -189,14 +192,20 @@ def process_dir(self, path, cache):
|
|||
return {'type': 'DIR', 'path': path}
|
||||
|
||||
def process_file(self, path, cache):
|
||||
print 'Adding: %s...' % path,
|
||||
sys.stdout.flush()
|
||||
with open(path, 'rb') as fd:
|
||||
size = 0
|
||||
origsize = 0
|
||||
compsize = 0
|
||||
chunks = []
|
||||
for chunk in chunker(fd, CHUNKSIZE, self.cache.summap):
|
||||
size += len(chunk)
|
||||
chunks.append(cache.add_chunk(chunk))
|
||||
origsize += len(chunk)
|
||||
id, size = cache.add_chunk(chunk)
|
||||
compsize += size
|
||||
chunks.append(id)
|
||||
path = path.lstrip('/\\:')
|
||||
print 'File: %s (%d chunks)' % (path, len(chunks))
|
||||
ratio = origsize and compsize * 100 / origsize or 0
|
||||
print '(%d chunks: %d%%)' % (len(chunks), ratio)
|
||||
return {'type': 'FILE', 'path': path, 'size': size, 'chunks': chunks}
|
||||
|
||||
def run(self):
|
||||
|
|
|
@ -49,11 +49,11 @@ def chunker(fd, chunk_size, chunks):
|
|||
>>> list(chunker(fd, 4, chunks))
|
||||
['ABCD', 'EFGH', 'IJ', 'KLMN']
|
||||
"""
|
||||
data = 'X' + fd.read(chunk_size * 2)
|
||||
data = 'X' + fd.read(chunk_size * 3)
|
||||
i = 1
|
||||
sum = checksum(data[:chunk_size])
|
||||
while True:
|
||||
if len(data) - i - 2 <= chunk_size:
|
||||
if len(data) - i <= chunk_size * 2:
|
||||
data += fd.read(chunk_size * 2)
|
||||
if i == chunk_size + 1:
|
||||
yield data[1:chunk_size + 1]
|
||||
|
@ -62,14 +62,13 @@ def chunker(fd, chunk_size, chunks):
|
|||
if len(data) - i <= chunk_size: # EOF?
|
||||
if len(data) > chunk_size + 1:
|
||||
yield data[1:len(data) - chunk_size]
|
||||
yield data[-chunk_size:]
|
||||
yield data[:chunk_size]
|
||||
else:
|
||||
yield data[1:]
|
||||
return
|
||||
sum = roll_checksum(sum, data[i - 1], data[i - 1 + chunk_size], chunk_size)
|
||||
#print data[i:i + chunk_size], sum
|
||||
if chunks.get(sum):
|
||||
print 'Woot', i
|
||||
if i > 1:
|
||||
yield data[1:i]
|
||||
yield data[i:i + chunk_size]
|
||||
|
|
Loading…
Reference in a new issue