Switched to an inter based python implementation of chunkify.

This commit is contained in:
Jonas Borgström 2010-02-28 21:34:56 +01:00
parent f6473155c9
commit 41a4842518
1 changed files with 62 additions and 28 deletions

View File

@ -28,12 +28,71 @@ def roll_checksum(sum, remove, add, len):
return (s1 & 0xffff) + ((s2 & 0xffff) << 16)
class ChunkifyIter(object):
def __init__(self, fd, chunk_size, chunks):
self.fd = fd
self.chunk_size = chunk_size
self.chunks = chunks
def __iter__(self):
self.data = ''
self.i = 0
self.full_sum = True
self.extra = None
self.done = False
return self
def next(self):
if self.done:
raise StopIteration
if self.extra:
self.done = True
return self.extra
while True:
if len(self.data) - self.i < self.chunk_size:
self.data += self.fd.read(self.chunk_size * 3)
if not self.data:
raise StopIteration
if self.full_sum or len(self.data) - self.i < self.chunk_size:
self.sum = checksum(self.data[self.i:self.i + self.chunk_size])
self.full_sum = False
self.remove = self.data[self.i]
else:
self.sum = roll_checksum(self.sum, self.remove, self.data[self.i + self.chunk_size - 1],
self.chunk_size)
self.remove = self.data[self.i]
if len(self.data) - self.i < self.chunk_size: # EOF?
if len(self.data) > self.chunk_size:
self.extra = self.data[-self.chunk_size:]
return self.data[:len(self.data) - self.chunk_size]
else:
self.done = True
return self.data
elif self.sum in self.chunks:
if self.i > 0:
chunk = self.data[:self.i]
self.data = self.data[self.i:]
else:
chunk = self.data[:self.chunk_size]
self.data = self.data[self.chunk_size:]
self.full_sum = True
self.i = 0
return chunk
elif self.i == self.chunk_size:
chunk = self.data[:self.chunk_size]
self.data = self.data[self.chunk_size:]
self.i = 0
return chunk
else:
self.i += 1
def chunkify(fd, chunk_size, chunks):
"""
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
>>> list(chunkify(fd, 4, {}))
['ABCD', 'EFGH', 'IJ', 'KLMN']
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
>>> chunks = {44564754: True} # 'BCDE'
>>> list(chunkify(fd, 4, chunks))
@ -49,33 +108,8 @@ def chunkify(fd, chunk_size, chunks):
>>> list(chunkify(fd, 4, chunks))
['ABCD', 'EFGH', 'IJ', 'KLMN']
"""
data = 'X' + fd.read(chunk_size * 3)
i = 1
sum = checksum(data[:chunk_size])
while True:
if len(data) - i <= chunk_size * 2:
data += fd.read(chunk_size * 2)
if i == chunk_size + 1:
yield data[1:chunk_size + 1]
i = 1
data = data[chunk_size:]
if len(data) - i <= chunk_size: # EOF?
if len(data) > chunk_size + 1:
yield data[1:len(data) - chunk_size]
yield data[-chunk_size:]
else:
yield data[1:]
return
sum = roll_checksum(sum, data[i - 1], data[i - 1 + chunk_size], chunk_size)
#print data[i:i + chunk_size], sum
if chunks.get(sum):
if i > 1:
yield data[1:i]
yield data[i:i + chunk_size]
data = data[i + chunk_size - 1:]
i = 0
sum = checksum(data[:chunk_size])
i += 1
return ChunkifyIter(fd, chunk_size, chunks)
if __name__ == '__main__':
import StringIO