1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-26 01:37:20 +00:00

Split large archive item packages into multiple smaller ones

This commit is contained in:
Jonas Borgström 2010-11-29 21:08:37 +01:00
parent 198b3f90fc
commit c9aca63d6d
2 changed files with 41 additions and 30 deletions

View file

@ -27,6 +27,7 @@ def __init__(self, store, keychain, name=None):
self.keychain = keychain self.keychain = keychain
self.store = store self.store = store
self.items = [] self.items = []
self.items_ids = []
self.hard_links = {} self.hard_links = {}
if name: if name:
self.load(self.keychain.id_hash(name)) self.load(self.keychain.id_hash(name))
@ -40,17 +41,33 @@ def load(self, id):
self.metadata = msgpack.unpackb(data) self.metadata = msgpack.unpackb(data)
assert self.metadata['version'] == 1 assert self.metadata['version'] == 1
def get_items(self): def get_chunks(self):
data, chunks_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, self.id)) data, chunks_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, self.id))
chunks = msgpack.unpackb(data) chunks = msgpack.unpackb(data)
assert chunks['version'] == 1 assert chunks['version'] == 1
assert self.metadata['chunks_hash'] == chunks_hash assert self.metadata['chunks_hash'] == chunks_hash
self.chunks = chunks['chunks'] return chunks['chunks']
data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, self.id))
def get_items(self):
for id in self.metadata['items_ids']:
data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, id))
assert items_hash == id
items = msgpack.unpackb(data) items = msgpack.unpackb(data)
assert items['version'] == 1 assert items['version'] == 1
assert self.metadata['items_hash'] == items_hash for item in items['items']:
self.items = items['items'] yield item
def add_item(self, item):
self.items.append(item)
if len(self.items) > 100000:
self.flush_items()
def flush_items(self):
items = {'version': 1, 'items': self.items}
data, items_hash = self.keychain.encrypt_read(msgpack.packb(items))
self.store.put(NS_ARCHIVE_ITEMS, items_hash, data)
self.items = []
self.items_ids.append(items_hash)
def save(self, name, cache): def save(self, name, cache):
self.id = self.keychain.id_hash(name) self.id = self.keychain.id_hash(name)
@ -58,14 +75,12 @@ def save(self, name, cache):
chunks = {'version': 1, 'chunks': self.chunks} chunks = {'version': 1, 'chunks': self.chunks}
data, chunks_hash = self.keychain.encrypt_create(msgpack.packb(chunks)) data, chunks_hash = self.keychain.encrypt_create(msgpack.packb(chunks))
self.store.put(NS_ARCHIVE_CHUNKS, self.id, data) self.store.put(NS_ARCHIVE_CHUNKS, self.id, data)
items = {'version': 1, 'items': self.items} self.flush_items()
data, items_hash = self.keychain.encrypt_read(msgpack.packb(items))
self.store.put(NS_ARCHIVE_ITEMS, self.id, data)
metadata = { metadata = {
'version': 1, 'version': 1,
'name': name, 'name': name,
'chunks_hash': chunks_hash, 'chunks_hash': chunks_hash,
'items_hash': items_hash, 'items_ids': self.items_ids,
'cmdline': sys.argv, 'cmdline': sys.argv,
'hostname': socket.gethostname(), 'hostname': socket.gethostname(),
'username': getuser(), 'username': getuser(),
@ -76,12 +91,11 @@ def save(self, name, cache):
self.store.commit() self.store.commit()
def stats(self, cache): def stats(self, cache):
self.get_items()
osize = csize = usize = 0 osize = csize = usize = 0
for item in self.items: for item in self.get_items():
if stat.S_ISREG(item['mode']) and not 'source' in item: if stat.S_ISREG(item['mode']) and not 'source' in item:
osize += item['size'] osize += item['size']
for id, size in self.chunks: for id, size in self.get_chunks():
csize += size csize += size
if cache.seen_chunk(id) == 1: if cache.seen_chunk(id) == 1:
usize += size usize += size
@ -167,11 +181,10 @@ def verify_file(self, item):
return True return True
def delete(self, cache): def delete(self, cache):
self.get_items()
self.store.delete(NS_ARCHIVE_CHUNKS, self.id) self.store.delete(NS_ARCHIVE_CHUNKS, self.id)
self.store.delete(NS_ARCHIVE_ITEMS, self.id) self.store.delete(NS_ARCHIVE_ITEMS, self.id)
self.store.delete(NS_ARCHIVE_METADATA, self.id) self.store.delete(NS_ARCHIVE_METADATA, self.id)
for id, size in self.chunks: for id, size in self.get_chunks():
cache.chunk_decref(id) cache.chunk_decref(id)
self.store.commit() self.store.commit()
cache.save() cache.save()
@ -200,18 +213,18 @@ def stat_attrs(self, st, path):
def process_dir(self, path, st): def process_dir(self, path, st):
item = {'path': path.lstrip('/\\:')} item = {'path': path.lstrip('/\\:')}
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
self.items.append(item) self.add_item(item)
def process_fifo(self, path, st): def process_fifo(self, path, st):
item = {'path': path.lstrip('/\\:')} item = {'path': path.lstrip('/\\:')}
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
self.items.append(item) self.add_item(item)
def process_symlink(self, path, st): def process_symlink(self, path, st):
source = os.readlink(path) source = os.readlink(path)
item = {'path': path.lstrip('/\\:'), 'source': source} item = {'path': path.lstrip('/\\:'), 'source': source}
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
self.items.append(item) self.add_item(item)
def process_file(self, path, st, cache): def process_file(self, path, st, cache):
safe_path = path.lstrip('/\\:') safe_path = path.lstrip('/\\:')
@ -219,9 +232,8 @@ def process_file(self, path, st, cache):
if st.st_nlink > 1: if st.st_nlink > 1:
source = self.hard_links.get((st.st_ino, st.st_dev)) source = self.hard_links.get((st.st_ino, st.st_dev))
if (st.st_ino, st.st_dev) in self.hard_links: if (st.st_ino, st.st_dev) in self.hard_links:
self.items.append({'mode': st.st_mode, self.add_item({'mode': st.st_mode,
'path': path, 'path': path, 'source': source})
'source': source})
return return
else: else:
self.hard_links[st.st_ino, st.st_dev] = safe_path self.hard_links[st.st_ino, st.st_dev] = safe_path
@ -248,7 +260,7 @@ def process_file(self, path, st, cache):
cache.memorize_file_chunks(path_hash, st, ids) cache.memorize_file_chunks(path_hash, st, ids)
item = {'path': safe_path, 'chunks': ids, 'size': size} item = {'path': safe_path, 'chunks': ids, 'size': size}
item.update(self.stat_attrs(st, path)) item.update(self.stat_attrs(st, path))
self.items.append(item) self.add_item(item)
@staticmethod @staticmethod
def list_archives(store, keychain): def list_archives(store, keychain):

View file

@ -114,9 +114,8 @@ def do_extract(self, args):
store = self.open_store(args.archive) store = self.open_store(args.archive)
keychain = Keychain(args.keychain) keychain = Keychain(args.keychain)
archive = Archive(store, keychain, args.archive.archive) archive = Archive(store, keychain, args.archive.archive)
archive.get_items()
dirs = [] dirs = []
for item in archive.items: for item in archive.get_items():
if exclude_path(item['path'], args.patterns): if exclude_path(item['path'], args.patterns):
continue continue
self.print_verbose(item['path'].decode('utf-8')) self.print_verbose(item['path'].decode('utf-8'))
@ -144,8 +143,7 @@ def do_list(self, args):
if args.src.archive: if args.src.archive:
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'} tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
archive = Archive(store, keychain, args.src.archive) archive = Archive(store, keychain, args.src.archive)
archive.get_items() for item in archive.get_items():
for item in archive.items:
type = tmap.get(item['mode'] / 4096, '?') type = tmap.get(item['mode'] / 4096, '?')
mode = format_file_mode(item['mode']) mode = format_file_mode(item['mode'])
size = item.get('size', 0) size = item.get('size', 0)
@ -158,11 +156,12 @@ def do_list(self, args):
return self.exit_code return self.exit_code
def do_verify(self, args): def do_verify(self, args):
import ipdb
ipdb.set_trace()
store = self.open_store(args.archive) store = self.open_store(args.archive)
keychain = Keychain(args.keychain) keychain = Keychain(args.keychain)
archive = Archive(store, keychain, args.archive.archive) archive = Archive(store, keychain, args.archive.archive)
archive.get_items() for item in archive.get_items():
for item in archive.items:
if stat.S_ISREG(item['mode']) and not 'source' in item: if stat.S_ISREG(item['mode']) and not 'source' in item:
self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False) self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
if archive.verify_file(item): if archive.verify_file(item):
@ -183,7 +182,7 @@ def do_info(self, args):
print 'Username:', archive.metadata['username'] print 'Username:', archive.metadata['username']
print 'Time:', archive.metadata['time'] print 'Time:', archive.metadata['time']
print 'Command line:', ' '.join(archive.metadata['cmdline']) print 'Command line:', ' '.join(archive.metadata['cmdline'])
print 'Number of Files:', len(archive.items) print 'Number of Files:', len(archive.get_items())
print 'Original size:', format_file_size(osize) print 'Original size:', format_file_size(osize)
print 'Compressed size:', format_file_size(csize) print 'Compressed size:', format_file_size(csize)
print 'Unique data:', format_file_size(usize) print 'Unique data:', format_file_size(usize)