borg/dedupstore/archiver.py

92 lines
2.2 KiB
Python
Raw Normal View History

2010-02-20 17:23:46 +00:00
import os
import sys
import hashlib
import zlib
from repository import Repository
CHUNKSIZE = 256 * 1024
class FileItem(object):
def __init__(self):
""""""
def process_file(self, filename, cache):
self.filename = filename
fd = open(filename, 'rb')
self.size = 0
self.chunks = []
while True:
data = fd.read(CHUNKSIZE)
if not data:
break
self.size += len(data)
self.chunks.append(cache.add_chunk(zlib.compress(data)))
print '%s: %d chunks' % (filename, len(self.chunks))
class Cache(object):
"""Client Side cache
"""
def __init__(self, repo):
self.repo = repo
self.chunkmap = {}
def chunk_filename(self, sha):
hex = sha.encode('hex')
return 'chunks/%s/%s/%s' % (hex[:2], hex[2:4], hex[4:])
def add_chunk(self, data):
sha = hashlib.sha1(data).digest()
if not self.seen_chunk(sha):
self.repo.put_file(self.chunk_filename(sha), data)
else:
print 'seen chunk', sha.encode('hex')
self.chunk_incref(sha)
return sha
def seen_chunk(self, sha):
return self.chunkmap.get(sha, 0) > 0
def chunk_incref(self, sha):
self.chunkmap.setdefault(sha, 0)
self.chunkmap[sha] += 1
def chunk_decref(self, sha):
assert self.chunkmap.get(sha, 0) > 0
self.chunkmap[sha] -= 1
return self.chunkmap[sha]
class Archive(object):
"""
"""
def __init__(self):
self.items = []
def add_item(self, item):
self.items.append(item)
class Archiver(object):
def __init__(self):
self.cache = Cache(Repository('/tmp/repo'))
self.archive = Archive()
def run(self, path):
for root, dirs, files in os.walk(path):
for f in files:
filename = os.path.join(root, f)
item = FileItem()
item.process_file(filename, self.cache)
self.archive.add_item(item)
self.cache.repo.commit()
def main():
archiver = Archiver()
archiver.run(sys.argv[1])
if __name__ == '__main__':
main()