Renamed chunker to chunkifier.

This commit is contained in:
Jonas Borgström 2010-02-28 16:20:19 +01:00
parent 0cca830981
commit 6c73f5dc86
2 changed files with 18 additions and 15 deletions

View File

@ -6,7 +6,7 @@ import struct
import cPickle
from optparse import OptionParser
from chunker import chunker, checksum
from chunkifier import chunkify, checksum
from store import Store
@ -70,12 +70,12 @@ class Cache(object):
def add_chunk(self, data):
sum = checksum(data)
data = zlib.compress(data)
#print 'chunk %d: %d' % (len(data), sum)
hash = struct.pack('I', sum) + hashlib.sha1(data).digest()
if not self.seen_chunk(hash):
zdata = zlib.compress(data)
size = len(zdata)
self.store.put(NS_CHUNKS, hash, zdata)
id = struct.pack('I', sum) + hashlib.sha1(data).digest()
if not self.seen_chunk(id):
size = len(data)
self.store.put(NS_CHUNKS, id, data)
else:
size = 0
#print 'seen chunk', hash.encode('hex')
@ -164,7 +164,7 @@ class Archiver(object):
print item['path'], '...',
for chunk in item['chunks']:
data = self.store.get(NS_CHUNKS, chunk)
if hashlib.sha1(data).digest() != chunk:
if hashlib.sha1(data).digest() != chunk[4:]:
print 'ERROR'
break
else:
@ -184,7 +184,10 @@ class Archiver(object):
if item['type'] == 'FILE':
with open(item['path'], 'wb') as fd:
for chunk in item['chunks']:
fd.write(zlib.decompress(self.store.get(NS_CHUNKS, chunk)))
data = self.store.get(NS_CHUNKS, chunk)
if hashlib.sha1(data).digest() != chunk[4:]:
raise Exception('Invalid chunk checksum')
fd.write(zlib.decompress(data))
def process_dir(self, path, cache):
path = path.lstrip('/\\:')
@ -198,7 +201,7 @@ class Archiver(object):
origsize = 0
compsize = 0
chunks = []
for chunk in chunker(fd, CHUNKSIZE, self.cache.summap):
for chunk in chunkify(fd, CHUNKSIZE, self.cache.summap):
origsize += len(chunk)
id, size = cache.add_chunk(chunk)
compsize += size

View File

@ -28,25 +28,25 @@ def roll_checksum(sum, remove, add, len):
return (s1 & 0xffff) + ((s2 & 0xffff) << 16)
def chunker(fd, chunk_size, chunks):
def chunkify(fd, chunk_size, chunks):
"""
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
>>> list(chunker(fd, 4, {}))
>>> list(chunkify(fd, 4, {}))
['ABCD', 'EFGH', 'IJ', 'KLMN']
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
>>> chunks = {44564754: True} # 'BCDE'
>>> list(chunker(fd, 4, chunks))
>>> list(chunkify(fd, 4, chunks))
['A', 'BCDE', 'FGHI', 'J', 'KLMN']
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
>>> chunks = {44564754: True, 48496938: True} # 'BCDE', 'HIJK'
>>> list(chunker(fd, 4, chunks))
>>> list(chunkify(fd, 4, chunks))
['A', 'BCDE', 'FG', 'HIJK', 'LMN']
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
>>> chunks = {43909390: True, 50463030: True} # 'ABCD', 'KLMN'
>>> list(chunker(fd, 4, chunks))
>>> list(chunkify(fd, 4, chunks))
['ABCD', 'EFGH', 'IJ', 'KLMN']
"""
data = 'X' + fd.read(chunk_size * 3)
@ -62,7 +62,7 @@ def chunker(fd, chunk_size, chunks):
if len(data) - i <= chunk_size: # EOF?
if len(data) > chunk_size + 1:
yield data[1:len(data) - chunk_size]
yield data[:chunk_size]
yield data[-chunk_size:]
else:
yield data[1:]
return