mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-07 06:59:38 +00:00
Started added suppor for a using variable length chunks.
This commit is contained in:
parent
e4807b11c9
commit
bc959ebb37
2 changed files with 110 additions and 10 deletions
|
@ -5,9 +5,11 @@
|
||||||
import cPickle
|
import cPickle
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
from chunker import chunker, checksum
|
||||||
from store import Store
|
from store import Store
|
||||||
|
|
||||||
CHUNKSIZE = 256 * 1024
|
|
||||||
|
CHUNKSIZE = 64 * 1024
|
||||||
NS_ARCHIVES = 'ARCHIVES'
|
NS_ARCHIVES = 'ARCHIVES'
|
||||||
NS_CHUNKS = 'CHUNKS'
|
NS_CHUNKS = 'CHUNKS'
|
||||||
|
|
||||||
|
@ -155,21 +157,34 @@ def verify_archive(self, archive_name):
|
||||||
else:
|
else:
|
||||||
print 'OK'
|
print 'OK'
|
||||||
|
|
||||||
|
def extract_archive(self, archive_name):
|
||||||
|
try:
|
||||||
|
archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, archive_name)))
|
||||||
|
except Store.DoesNotExist:
|
||||||
|
raise Exception('Archive "%s" does not exist' % archive_name)
|
||||||
|
for item in archive['items']:
|
||||||
|
assert item['path'][0] not in ('/', '\\', ':')
|
||||||
|
print item['path']
|
||||||
|
if item['type'] == 'DIR':
|
||||||
|
if not os.path.exists(item['path']):
|
||||||
|
os.makedirs(item['path'])
|
||||||
|
if item['type'] == 'FILE':
|
||||||
|
with open(item['path'], 'wb') as fd:
|
||||||
|
for chunk in item['chunks']:
|
||||||
|
fd.write(zlib.decompress(self.store.get(NS_CHUNKS, chunk)))
|
||||||
|
|
||||||
def process_dir(self, path, cache):
|
def process_dir(self, path, cache):
|
||||||
path = path.lstrip('/\\:')
|
path = path.lstrip('/\\:')
|
||||||
print 'Directory: %s' % (path)
|
print 'Directory: %s' % (path)
|
||||||
return {'type': 'DIR', 'path': path}
|
return {'type': 'DIR', 'path': path}
|
||||||
|
|
||||||
def process_file(self, path, cache):
|
def process_file(self, path, cache):
|
||||||
fd = open(path, 'rb')
|
with open(path, 'rb') as fd:
|
||||||
size = 0
|
size = 0
|
||||||
chunks = []
|
chunks = []
|
||||||
while True:
|
for chunk in chunker(fd, CHUNKSIZE, {}):
|
||||||
data = fd.read(CHUNKSIZE)
|
size += len(chunk)
|
||||||
if not data:
|
chunks.append(cache.add_chunk(zlib.compress(chunk)))
|
||||||
break
|
|
||||||
size += len(data)
|
|
||||||
chunks.append(cache.add_chunk(zlib.compress(data)))
|
|
||||||
path = path.lstrip('/\\:')
|
path = path.lstrip('/\\:')
|
||||||
print 'File: %s (%d chunks)' % (path, len(chunks))
|
print 'File: %s (%d chunks)' % (path, len(chunks))
|
||||||
return {'type': 'FILE', 'path': path, 'size': size, 'chunks': chunks}
|
return {'type': 'FILE', 'path': path, 'size': size, 'chunks': chunks}
|
||||||
|
@ -208,6 +223,8 @@ def run(self):
|
||||||
self.list_archive(options.list_archive)
|
self.list_archive(options.list_archive)
|
||||||
elif options.verify_archive:
|
elif options.verify_archive:
|
||||||
self.verify_archive(options.verify_archive)
|
self.verify_archive(options.verify_archive)
|
||||||
|
elif options.extract_archive:
|
||||||
|
self.extract_archive(options.extract_archive)
|
||||||
elif options.delete_archive:
|
elif options.delete_archive:
|
||||||
self.delete_archive(options.delete_archive)
|
self.delete_archive(options.delete_archive)
|
||||||
else:
|
else:
|
||||||
|
|
83
dedupstore/chunker.py
Normal file
83
dedupstore/chunker.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
def checksum(data, sum=0):
|
||||||
|
"""Simple but fast checksum that can be updated at either end.
|
||||||
|
|
||||||
|
>>> checksum('FOOBAR')
|
||||||
|
102367679
|
||||||
|
>>> checksum('FOOBAR') == checksum('BAR', checksum('FOO'))
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
s1 = sum & 0xffff
|
||||||
|
s2 = sum >> 16
|
||||||
|
for c in data:
|
||||||
|
s1 += ord(c) + 1
|
||||||
|
s2 += s1
|
||||||
|
return ((s2 & 0xffff) << 16) + (s1 & 0xffff)
|
||||||
|
|
||||||
|
|
||||||
|
def roll_checksum(sum, remove, add, len):
|
||||||
|
"""
|
||||||
|
>>> roll_checksum(checksum('XFOOBA'), 'X', 'R', 6) == checksum('FOOBAR')
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
s1 = sum & 0xffff
|
||||||
|
s2 = sum >> 16
|
||||||
|
add = ord(add)
|
||||||
|
remove = ord(remove)
|
||||||
|
s1 -= remove - add
|
||||||
|
s2 -= len * (remove + 1) - s1
|
||||||
|
return (s1 & 0xffff) + ((s2 & 0xffff) << 16)
|
||||||
|
|
||||||
|
|
||||||
|
def chunker(fd, chunk_size, chunks):
|
||||||
|
"""
|
||||||
|
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
||||||
|
>>> list(chunker(fd, 4, {}))
|
||||||
|
['ABCD', 'EFGH', 'IJ', 'KLMN']
|
||||||
|
|
||||||
|
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
||||||
|
>>> chunks = {44564754: True} # 'BCDE'
|
||||||
|
>>> list(chunker(fd, 4, chunks))
|
||||||
|
['A', 'BCDE', 'FGHI', 'J', 'KLMN']
|
||||||
|
|
||||||
|
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
||||||
|
>>> chunks = {44564754: True, 48496938: True} # 'BCDE', 'HIJK'
|
||||||
|
>>> list(chunker(fd, 4, chunks))
|
||||||
|
['A', 'BCDE', 'FG', 'HIJK', 'LMN']
|
||||||
|
|
||||||
|
>>> fd = StringIO.StringIO('ABCDEFGHIJKLMN')
|
||||||
|
>>> chunks = {43909390: True, 50463030: True} # 'ABCD', 'KLMN'
|
||||||
|
>>> list(chunker(fd, 4, chunks))
|
||||||
|
['ABCD', 'EFGH', 'IJ', 'KLMN']
|
||||||
|
"""
|
||||||
|
data = 'X' + fd.read(chunk_size * 2)
|
||||||
|
i = 1
|
||||||
|
sum = checksum(data[:chunk_size])
|
||||||
|
while True:
|
||||||
|
if len(data) - i - 2 <= chunk_size:
|
||||||
|
data += fd.read(chunk_size * 2)
|
||||||
|
if i == chunk_size + 1:
|
||||||
|
yield data[1:chunk_size + 1]
|
||||||
|
i = 1
|
||||||
|
data = data[chunk_size:]
|
||||||
|
if len(data) - i <= chunk_size: # EOF?
|
||||||
|
if len(data) > chunk_size + 1:
|
||||||
|
yield data[1:len(data) - chunk_size]
|
||||||
|
yield data[-chunk_size:]
|
||||||
|
else:
|
||||||
|
yield data[1:]
|
||||||
|
return
|
||||||
|
sum = roll_checksum(sum, data[i - 1], data[i - 1 + chunk_size], chunk_size)
|
||||||
|
#print data[i:i + chunk_size], sum
|
||||||
|
if sum in chunks:
|
||||||
|
if i > 1:
|
||||||
|
yield data[1:i]
|
||||||
|
yield data[i:i + chunk_size]
|
||||||
|
data = data[i + chunk_size - 1:]
|
||||||
|
i = 0
|
||||||
|
sum = checksum(data[:chunk_size])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import StringIO
|
||||||
|
import doctest
|
||||||
|
doctest.testmod()
|
Loading…
Reference in a new issue