mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-30 19:21:17 +00:00
Switched to an inter based python implementation of chunkify.
This commit is contained in:
parent
f6473155c9
commit
41a4842518
1 changed files with 62 additions and 28 deletions
|
@ -28,12 +28,71 @@ def roll_checksum(sum, remove, add, len):
|
|||
return (s1 & 0xffff) + ((s2 & 0xffff) << 16)
|
||||
|
||||
|
||||
class ChunkifyIter(object):
|
||||
def __init__(self, fd, chunk_size, chunks):
|
||||
self.fd = fd
|
||||
self.chunk_size = chunk_size
|
||||
self.chunks = chunks
|
||||
|
||||
def __iter__(self):
|
||||
self.data = ''
|
||||
self.i = 0
|
||||
self.full_sum = True
|
||||
self.extra = None
|
||||
self.done = False
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
if self.done:
|
||||
raise StopIteration
|
||||
if self.extra:
|
||||
self.done = True
|
||||
return self.extra
|
||||
while True:
|
||||
if len(self.data) - self.i < self.chunk_size:
|
||||
self.data += self.fd.read(self.chunk_size * 3)
|
||||
if not self.data:
|
||||
raise StopIteration
|
||||
if self.full_sum or len(self.data) - self.i < self.chunk_size:
|
||||
self.sum = checksum(self.data[self.i:self.i + self.chunk_size])
|
||||
self.full_sum = False
|
||||
self.remove = self.data[self.i]
|
||||
else:
|
||||
self.sum = roll_checksum(self.sum, self.remove, self.data[self.i + self.chunk_size - 1],
|
||||
self.chunk_size)
|
||||
self.remove = self.data[self.i]
|
||||
if len(self.data) - self.i < self.chunk_size: # EOF?
|
||||
if len(self.data) > self.chunk_size:
|
||||
self.extra = self.data[-self.chunk_size:]
|
||||
return self.data[:len(self.data) - self.chunk_size]
|
||||
else:
|
||||
self.done = True
|
||||
return self.data
|
||||
elif self.sum in self.chunks:
|
||||
if self.i > 0:
|
||||
chunk = self.data[:self.i]
|
||||
self.data = self.data[self.i:]
|
||||
else:
|
||||
chunk = self.data[:self.chunk_size]
|
||||
self.data = self.data[self.chunk_size:]
|
||||
self.full_sum = True
|
||||
self.i = 0
|
||||
return chunk
|
||||
elif self.i == self.chunk_size:
|
||||
chunk = self.data[:self.chunk_size]
|
||||
self.data = self.data[self.chunk_size:]
|
||||
self.i = 0
|
||||
return chunk
|
||||
else:
|
||||
self.i += 1
|
||||
|
||||
|
||||
def chunkify(fd, chunk_size, chunks):
|
||||
"""
|
||||
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
||||
>>> list(chunkify(fd, 4, {}))
|
||||
['ABCD', 'EFGH', 'IJ', 'KLMN']
|
||||
|
||||
|
||||
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
||||
>>> chunks = {44564754: True} # 'BCDE'
|
||||
>>> list(chunkify(fd, 4, chunks))
|
||||
|
@ -49,33 +108,8 @@ def chunkify(fd, chunk_size, chunks):
|
|||
>>> list(chunkify(fd, 4, chunks))
|
||||
['ABCD', 'EFGH', 'IJ', 'KLMN']
|
||||
"""
|
||||
data = 'X' + fd.read(chunk_size * 3)
|
||||
i = 1
|
||||
sum = checksum(data[:chunk_size])
|
||||
while True:
|
||||
if len(data) - i <= chunk_size * 2:
|
||||
data += fd.read(chunk_size * 2)
|
||||
if i == chunk_size + 1:
|
||||
yield data[1:chunk_size + 1]
|
||||
i = 1
|
||||
data = data[chunk_size:]
|
||||
if len(data) - i <= chunk_size: # EOF?
|
||||
if len(data) > chunk_size + 1:
|
||||
yield data[1:len(data) - chunk_size]
|
||||
yield data[-chunk_size:]
|
||||
else:
|
||||
yield data[1:]
|
||||
return
|
||||
sum = roll_checksum(sum, data[i - 1], data[i - 1 + chunk_size], chunk_size)
|
||||
#print data[i:i + chunk_size], sum
|
||||
if chunks.get(sum):
|
||||
if i > 1:
|
||||
yield data[1:i]
|
||||
yield data[i:i + chunk_size]
|
||||
data = data[i + chunk_size - 1:]
|
||||
i = 0
|
||||
sum = checksum(data[:chunk_size])
|
||||
i += 1
|
||||
return ChunkifyIter(fd, chunk_size, chunks)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import StringIO
|
||||
|
|
Loading…
Reference in a new issue