1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-25 09:19:31 +00:00

Code re-organization

This commit is contained in:
Jonas Borgström 2010-10-20 19:59:15 +02:00
parent 0839e6bed3
commit 32c815800e
3 changed files with 213 additions and 208 deletions

207
dedupestore/archive.py Normal file
View file

@ -0,0 +1,207 @@
from datetime import datetime
import hashlib
import logging
import msgpack
import os
import stat
import zlib
from .cache import NS_ARCHIVES, NS_CHUNKS
from .chunkifier import chunkify
from .helpers import uid2user, user2uid, gid2group, group2gid
CHUNK_SIZE = 55001
class Archive(object):
def __init__(self, store, cache, name=None):
self.store = store
self.cache = cache
self.items = []
self.chunks = []
self.chunk_idx = {}
if name:
self.open(name)
def open(self, name):
id = self.cache.archives[name]
data = self.store.get(NS_ARCHIVES, id)
if hashlib.sha256(data).digest() != id:
raise Exception('Archive hash did not match')
archive = msgpack.unpackb(zlib.decompress(data))
self.items = archive['items']
self.name = archive['name']
self.chunks = archive['chunks']
for i, chunk in enumerate(archive['chunks']):
self.chunk_idx[i] = chunk[0]
def save(self, name):
archive = {
'name': name,
'ts': datetime.utcnow().isoformat(),
'items': self.items,
'chunks': self.chunks
}
data = zlib.compress(msgpack.packb(archive))
self.id = hashlib.sha256(data).digest()
self.store.put(NS_ARCHIVES, self.id, data)
self.store.commit()
def add_chunk(self, id, size):
try:
return self.chunk_idx[id]
except KeyError:
idx = len(self.chunks)
self.chunks.append((id, size))
self.chunk_idx[id] = idx
return idx
def stats(self, cache):
total_osize = 0
total_csize = 0
total_usize = 0
chunk_count = {}
for item in self.items:
if item['type'] == 'FILE':
total_osize += item['size']
for idx in item['chunks']:
id = self.chunk_idx[idx]
chunk_count.setdefault(id, 0)
chunk_count[id] += 1
for id, c in chunk_count.items():
count, size = cache.chunkmap[id]
total_csize += size
if c == count:
total_usize += size
return dict(osize=total_osize, csize=total_csize, usize=total_usize)
def list(self):
for item in self.items:
print item['path']
def extract(self, dest=None):
dest = dest or os.getcwdu()
for item in self.items:
assert item['path'][0] not in ('/', '\\', ':')
path = os.path.join(dest, item['path'].decode('utf-8'))
if item['type'] == 'DIRECTORY':
logging.info(path)
if not os.path.exists(path):
os.makedirs(path)
elif item['type'] == 'SYMLINK':
logging.info('%s => %s', path, item['source'])
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
os.symlink(item['source'], path)
elif item['type'] == 'FILE':
logging.info(path)
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with open(path, 'wb') as fd:
for chunk in item['chunks']:
id = self.chunk_idx[chunk]
data = self.store.get(NS_CHUNKS, id)
cid = data[:32]
data = data[32:]
if hashlib.sha256(data).digest() != cid:
raise Exception('Invalid chunk checksum')
data = zlib.decompress(data)
fd.write(data)
os.chmod(path, item['mode'])
uid = user2uid(item['user']) or item['uid']
gid = group2gid(item['group']) or item['gid']
try:
os.chown(path, uid, gid)
except OSError:
pass
os.utime(path, (item['ctime'], item['mtime']))
def verify(self):
for item in self.items:
if item['type'] == 'FILE':
item['path'] = item['path'].decode('utf-8')
for chunk in item['chunks']:
id = self.chunk_idx[chunk]
data = self.store.get(NS_CHUNKS, id)
data = self.store.get(NS_CHUNKS, id)
cid = data[:32]
data = data[32:]
if (hashlib.sha256(data).digest() != cid):
logging.error('%s ... ERROR', item['path'])
break
else:
logging.info('%s ... OK', item['path'])
def delete(self, cache):
self.store.delete(NS_ARCHIVES, self.cache.archives[self.name])
for item in self.items:
if item['type'] == 'FILE':
for c in item['chunks']:
id = self.chunk_idx[c]
cache.chunk_decref(id)
self.store.commit()
del cache.archives[self.name]
cache.save()
def walk(self, path):
st = os.lstat(path)
if stat.S_ISDIR(st.st_mode):
for f in os.listdir(path):
for x in self.walk(os.path.join(path, f)):
yield x
else:
yield path, st
def create(self, name, paths, cache):
if name in cache.archives:
raise NameError('Archive already exists')
for path in paths:
for path, st in self.walk(unicode(path)):
if stat.S_ISDIR(st.st_mode):
self.process_dir(path, st)
elif stat.S_ISLNK(st.st_mode):
self.process_link(path, st)
elif stat.S_ISREG(st.st_mode):
self.process_file(path, st)
else:
logging.error('Unknown file type: %s', path)
self.save(name)
cache.archives[name] = self.id
cache.save()
def process_dir(self, path, st):
path = path.lstrip('/\\:')
logging.info(path)
self.items.append({'type': 'DIRECTORY', 'path': path})
def process_link(self, path, st):
source = os.readlink(path)
path = path.lstrip('/\\:')
logging.info('%s => %s', path, source)
self.items.append({'type': 'SYMLINK', 'path': path, 'source': source})
def process_file(self, path, st):
try:
fd = open(path, 'rb')
except IOError, e:
logging.error(e)
return
with fd:
path = path.lstrip('/\\:')
logging.info(path)
chunks = []
size = 0
for chunk in chunkify(fd, CHUNK_SIZE, 30):
size += len(chunk)
chunks.append(self.add_chunk(*self.cache.add_chunk(chunk)))
self.items.append({
'type': 'FILE', 'path': path, 'chunks': chunks, 'size': size,
'mode': st.st_mode,
'uid': st.st_uid, 'user': uid2user(st.st_uid),
'gid': st.st_gid, 'group': gid2group(st.st_gid),
'ctime': st.st_ctime, 'mtime': st.st_mtime,
})

View file

@ -1,210 +1,11 @@
import os
import hashlib
import logging
import zlib
import argparse import argparse
import logging
import sys import sys
import stat
from datetime import datetime
import msgpack from .archive import Archive
from .chunkifier import chunkify
from .cache import Cache, NS_ARCHIVES, NS_CHUNKS
from .bandstore import BandStore from .bandstore import BandStore
from .helpers import location_validator, pretty_size, LevelFilter, \ from .cache import Cache
uid2user, user2uid, gid2group, group2gid from .helpers import location_validator, pretty_size, LevelFilter
CHUNK_SIZE = 55001
class Archive(object):
def __init__(self, store, cache, name=None):
self.store = store
self.cache = cache
self.items = []
self.chunks = []
self.chunk_idx = {}
if name:
self.open(name)
def open(self, name):
id = self.cache.archives[name]
data = self.store.get(NS_ARCHIVES, id)
if hashlib.sha256(data).digest() != id:
raise Exception('Archive hash did not match')
archive = msgpack.unpackb(zlib.decompress(data))
self.items = archive['items']
self.name = archive['name']
self.chunks = archive['chunks']
for i, chunk in enumerate(archive['chunks']):
self.chunk_idx[i] = chunk[0]
def save(self, name):
archive = {
'name': name,
'ts': datetime.utcnow().isoformat(),
'items': self.items,
'chunks': self.chunks
}
data = zlib.compress(msgpack.packb(archive))
self.id = hashlib.sha256(data).digest()
self.store.put(NS_ARCHIVES, self.id, data)
self.store.commit()
def add_chunk(self, id, size):
try:
return self.chunk_idx[id]
except KeyError:
idx = len(self.chunks)
self.chunks.append((id, size))
self.chunk_idx[id] = idx
return idx
def stats(self, cache):
total_osize = 0
total_csize = 0
total_usize = 0
chunk_count = {}
for item in self.items:
if item['type'] == 'FILE':
total_osize += item['size']
for idx in item['chunks']:
id = self.chunk_idx[idx]
chunk_count.setdefault(id, 0)
chunk_count[id] += 1
for id, c in chunk_count.items():
count, size = cache.chunkmap[id]
total_csize += size
if c == count:
total_usize += size
return dict(osize=total_osize, csize=total_csize, usize=total_usize)
def list(self):
for item in self.items:
print item['path']
def extract(self, dest=None):
dest = dest or os.getcwdu()
for item in self.items:
assert item['path'][0] not in ('/', '\\', ':')
path = os.path.join(dest, item['path'].decode('utf-8'))
if item['type'] == 'DIRECTORY':
logging.info(path)
if not os.path.exists(path):
os.makedirs(path)
elif item['type'] == 'SYMLINK':
logging.info('%s => %s', path, item['source'])
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
os.symlink(item['source'], path)
elif item['type'] == 'FILE':
logging.info(path)
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with open(path, 'wb') as fd:
for chunk in item['chunks']:
id = self.chunk_idx[chunk]
data = self.store.get(NS_CHUNKS, id)
cid = data[:32]
data = data[32:]
if hashlib.sha256(data).digest() != cid:
raise Exception('Invalid chunk checksum')
data = zlib.decompress(data)
fd.write(data)
os.chmod(path, item['mode'])
uid = user2uid(item['user']) or item['uid']
gid = group2gid(item['group']) or item['gid']
try:
os.chown(path, uid, gid)
except OSError:
pass
os.utime(path, (item['ctime'], item['mtime']))
def verify(self):
for item in self.items:
if item['type'] == 'FILE':
item['path'] = item['path'].decode('utf-8')
for chunk in item['chunks']:
id = self.chunk_idx[chunk]
data = self.store.get(NS_CHUNKS, id)
data = self.store.get(NS_CHUNKS, id)
cid = data[:32]
data = data[32:]
if (hashlib.sha256(data).digest() != cid):
logging.error('%s ... ERROR', item['path'])
break
else:
logging.info('%s ... OK', item['path'])
def delete(self, cache):
self.store.delete(NS_ARCHIVES, self.cache.archives[self.name])
for item in self.items:
if item['type'] == 'FILE':
for c in item['chunks']:
id = self.chunk_idx[c]
cache.chunk_decref(id)
self.store.commit()
del cache.archives[self.name]
cache.save()
def walk(self, path):
st = os.lstat(path)
if stat.S_ISDIR(st.st_mode):
for f in os.listdir(path):
for x in self.walk(os.path.join(path, f)):
yield x
else:
yield path, st
def create(self, name, paths, cache):
if name in cache.archives:
raise NameError('Archive already exists')
for path in paths:
for path, st in self.walk(unicode(path)):
if stat.S_ISDIR(st.st_mode):
self.process_dir(path, st)
elif stat.S_ISLNK(st.st_mode):
self.process_link(path, st)
elif stat.S_ISREG(st.st_mode):
self.process_file(path, st)
else:
logging.error('Unknown file type: %s', path)
self.save(name)
cache.archives[name] = self.id
cache.save()
def process_dir(self, path, st):
path = path.lstrip('/\\:')
logging.info(path)
self.items.append({'type': 'DIRECTORY', 'path': path})
def process_link(self, path, st):
source = os.readlink(path)
path = path.lstrip('/\\:')
logging.info('%s => %s', path, source)
self.items.append({'type': 'SYMLINK', 'path': path, 'source': source})
def process_file(self, path, st):
try:
fd = open(path, 'rb')
except IOError, e:
logging.error(e)
return
with fd:
path = path.lstrip('/\\:')
logging.info(path)
chunks = []
size = 0
for chunk in chunkify(fd, CHUNK_SIZE, 30):
size += len(chunk)
chunks.append(self.add_chunk(*self.cache.add_chunk(chunk)))
self.items.append({
'type': 'FILE', 'path': path, 'chunks': chunks, 'size': size,
'mode': st.st_mode,
'uid': st.st_uid, 'user': uid2user(st.st_uid),
'gid': st.st_gid, 'group': gid2group(st.st_gid),
'ctime': st.st_ctime, 'mtime': st.st_mtime,
})
class Archiver(object): class Archiver(object):
@ -215,10 +16,7 @@ def open_store(self, location):
return store, cache return store, cache
def exit_code_from_logger(self): def exit_code_from_logger(self):
if not self.level_filter.count.get('ERROR'): return 1 if self.level_filter.count.get('ERROR') else 0
return 0
else:
return 1
def do_create(self, args): def do_create(self, args):
store, cache = self.open_store(args.archive) store, cache = self.open_store(args.archive)

View file

@ -1,7 +1,7 @@
import hashlib import hashlib
import os import os
import zlib
import msgpack import msgpack
import zlib
NS_ARCHIVES = 'A' NS_ARCHIVES = 'A'
NS_CHUNKS = 'C' NS_CHUNKS = 'C'