mirror of https://github.com/borgbackup/borg.git
92 lines
2.2 KiB
Python
92 lines
2.2 KiB
Python
import os
|
|
import sys
|
|
import hashlib
|
|
import zlib
|
|
from repository import Repository
|
|
|
|
CHUNKSIZE = 256 * 1024
|
|
|
|
class FileItem(object):
|
|
|
|
def __init__(self):
|
|
""""""
|
|
|
|
def process_file(self, filename, cache):
|
|
self.filename = filename
|
|
fd = open(filename, 'rb')
|
|
self.size = 0
|
|
self.chunks = []
|
|
while True:
|
|
data = fd.read(CHUNKSIZE)
|
|
if not data:
|
|
break
|
|
self.size += len(data)
|
|
self.chunks.append(cache.add_chunk(zlib.compress(data)))
|
|
print '%s: %d chunks' % (filename, len(self.chunks))
|
|
|
|
|
|
class Cache(object):
|
|
"""Client Side cache
|
|
"""
|
|
def __init__(self, repo):
|
|
self.repo = repo
|
|
self.chunkmap = {}
|
|
|
|
def chunk_filename(self, sha):
|
|
hex = sha.encode('hex')
|
|
return 'chunks/%s/%s/%s' % (hex[:2], hex[2:4], hex[4:])
|
|
|
|
def add_chunk(self, data):
|
|
sha = hashlib.sha1(data).digest()
|
|
if not self.seen_chunk(sha):
|
|
self.repo.put_file(self.chunk_filename(sha), data)
|
|
else:
|
|
print 'seen chunk', sha.encode('hex')
|
|
self.chunk_incref(sha)
|
|
return sha
|
|
|
|
def seen_chunk(self, sha):
|
|
return self.chunkmap.get(sha, 0) > 0
|
|
|
|
def chunk_incref(self, sha):
|
|
self.chunkmap.setdefault(sha, 0)
|
|
self.chunkmap[sha] += 1
|
|
|
|
def chunk_decref(self, sha):
|
|
assert self.chunkmap.get(sha, 0) > 0
|
|
self.chunkmap[sha] -= 1
|
|
return self.chunkmap[sha]
|
|
|
|
|
|
class Archive(object):
|
|
"""
|
|
"""
|
|
def __init__(self):
|
|
self.items = []
|
|
|
|
def add_item(self, item):
|
|
self.items.append(item)
|
|
|
|
|
|
class Archiver(object):
|
|
|
|
def __init__(self):
|
|
self.cache = Cache(Repository('/tmp/repo'))
|
|
self.archive = Archive()
|
|
|
|
def run(self, path):
|
|
for root, dirs, files in os.walk(path):
|
|
for f in files:
|
|
filename = os.path.join(root, f)
|
|
item = FileItem()
|
|
item.process_file(filename, self.cache)
|
|
self.archive.add_item(item)
|
|
self.cache.repo.commit()
|
|
|
|
|
|
def main():
|
|
archiver = Archiver()
|
|
archiver.run(sys.argv[1])
|
|
|
|
if __name__ == '__main__':
|
|
main() |