mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-26 01:37:20 +00:00
Split large archive item packages into multiple smaller ones
This commit is contained in:
parent
198b3f90fc
commit
c9aca63d6d
2 changed files with 41 additions and 30 deletions
|
@ -27,6 +27,7 @@ def __init__(self, store, keychain, name=None):
|
||||||
self.keychain = keychain
|
self.keychain = keychain
|
||||||
self.store = store
|
self.store = store
|
||||||
self.items = []
|
self.items = []
|
||||||
|
self.items_ids = []
|
||||||
self.hard_links = {}
|
self.hard_links = {}
|
||||||
if name:
|
if name:
|
||||||
self.load(self.keychain.id_hash(name))
|
self.load(self.keychain.id_hash(name))
|
||||||
|
@ -40,17 +41,33 @@ def load(self, id):
|
||||||
self.metadata = msgpack.unpackb(data)
|
self.metadata = msgpack.unpackb(data)
|
||||||
assert self.metadata['version'] == 1
|
assert self.metadata['version'] == 1
|
||||||
|
|
||||||
def get_items(self):
|
def get_chunks(self):
|
||||||
data, chunks_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, self.id))
|
data, chunks_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, self.id))
|
||||||
chunks = msgpack.unpackb(data)
|
chunks = msgpack.unpackb(data)
|
||||||
assert chunks['version'] == 1
|
assert chunks['version'] == 1
|
||||||
assert self.metadata['chunks_hash'] == chunks_hash
|
assert self.metadata['chunks_hash'] == chunks_hash
|
||||||
self.chunks = chunks['chunks']
|
return chunks['chunks']
|
||||||
data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, self.id))
|
|
||||||
|
def get_items(self):
|
||||||
|
for id in self.metadata['items_ids']:
|
||||||
|
data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, id))
|
||||||
|
assert items_hash == id
|
||||||
items = msgpack.unpackb(data)
|
items = msgpack.unpackb(data)
|
||||||
assert items['version'] == 1
|
assert items['version'] == 1
|
||||||
assert self.metadata['items_hash'] == items_hash
|
for item in items['items']:
|
||||||
self.items = items['items']
|
yield item
|
||||||
|
|
||||||
|
def add_item(self, item):
|
||||||
|
self.items.append(item)
|
||||||
|
if len(self.items) > 100000:
|
||||||
|
self.flush_items()
|
||||||
|
|
||||||
|
def flush_items(self):
|
||||||
|
items = {'version': 1, 'items': self.items}
|
||||||
|
data, items_hash = self.keychain.encrypt_read(msgpack.packb(items))
|
||||||
|
self.store.put(NS_ARCHIVE_ITEMS, items_hash, data)
|
||||||
|
self.items = []
|
||||||
|
self.items_ids.append(items_hash)
|
||||||
|
|
||||||
def save(self, name, cache):
|
def save(self, name, cache):
|
||||||
self.id = self.keychain.id_hash(name)
|
self.id = self.keychain.id_hash(name)
|
||||||
|
@ -58,14 +75,12 @@ def save(self, name, cache):
|
||||||
chunks = {'version': 1, 'chunks': self.chunks}
|
chunks = {'version': 1, 'chunks': self.chunks}
|
||||||
data, chunks_hash = self.keychain.encrypt_create(msgpack.packb(chunks))
|
data, chunks_hash = self.keychain.encrypt_create(msgpack.packb(chunks))
|
||||||
self.store.put(NS_ARCHIVE_CHUNKS, self.id, data)
|
self.store.put(NS_ARCHIVE_CHUNKS, self.id, data)
|
||||||
items = {'version': 1, 'items': self.items}
|
self.flush_items()
|
||||||
data, items_hash = self.keychain.encrypt_read(msgpack.packb(items))
|
|
||||||
self.store.put(NS_ARCHIVE_ITEMS, self.id, data)
|
|
||||||
metadata = {
|
metadata = {
|
||||||
'version': 1,
|
'version': 1,
|
||||||
'name': name,
|
'name': name,
|
||||||
'chunks_hash': chunks_hash,
|
'chunks_hash': chunks_hash,
|
||||||
'items_hash': items_hash,
|
'items_ids': self.items_ids,
|
||||||
'cmdline': sys.argv,
|
'cmdline': sys.argv,
|
||||||
'hostname': socket.gethostname(),
|
'hostname': socket.gethostname(),
|
||||||
'username': getuser(),
|
'username': getuser(),
|
||||||
|
@ -76,12 +91,11 @@ def save(self, name, cache):
|
||||||
self.store.commit()
|
self.store.commit()
|
||||||
|
|
||||||
def stats(self, cache):
|
def stats(self, cache):
|
||||||
self.get_items()
|
|
||||||
osize = csize = usize = 0
|
osize = csize = usize = 0
|
||||||
for item in self.items:
|
for item in self.get_items():
|
||||||
if stat.S_ISREG(item['mode']) and not 'source' in item:
|
if stat.S_ISREG(item['mode']) and not 'source' in item:
|
||||||
osize += item['size']
|
osize += item['size']
|
||||||
for id, size in self.chunks:
|
for id, size in self.get_chunks():
|
||||||
csize += size
|
csize += size
|
||||||
if cache.seen_chunk(id) == 1:
|
if cache.seen_chunk(id) == 1:
|
||||||
usize += size
|
usize += size
|
||||||
|
@ -167,11 +181,10 @@ def verify_file(self, item):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def delete(self, cache):
|
def delete(self, cache):
|
||||||
self.get_items()
|
|
||||||
self.store.delete(NS_ARCHIVE_CHUNKS, self.id)
|
self.store.delete(NS_ARCHIVE_CHUNKS, self.id)
|
||||||
self.store.delete(NS_ARCHIVE_ITEMS, self.id)
|
self.store.delete(NS_ARCHIVE_ITEMS, self.id)
|
||||||
self.store.delete(NS_ARCHIVE_METADATA, self.id)
|
self.store.delete(NS_ARCHIVE_METADATA, self.id)
|
||||||
for id, size in self.chunks:
|
for id, size in self.get_chunks():
|
||||||
cache.chunk_decref(id)
|
cache.chunk_decref(id)
|
||||||
self.store.commit()
|
self.store.commit()
|
||||||
cache.save()
|
cache.save()
|
||||||
|
@ -200,18 +213,18 @@ def stat_attrs(self, st, path):
|
||||||
def process_dir(self, path, st):
|
def process_dir(self, path, st):
|
||||||
item = {'path': path.lstrip('/\\:')}
|
item = {'path': path.lstrip('/\\:')}
|
||||||
item.update(self.stat_attrs(st, path))
|
item.update(self.stat_attrs(st, path))
|
||||||
self.items.append(item)
|
self.add_item(item)
|
||||||
|
|
||||||
def process_fifo(self, path, st):
|
def process_fifo(self, path, st):
|
||||||
item = {'path': path.lstrip('/\\:')}
|
item = {'path': path.lstrip('/\\:')}
|
||||||
item.update(self.stat_attrs(st, path))
|
item.update(self.stat_attrs(st, path))
|
||||||
self.items.append(item)
|
self.add_item(item)
|
||||||
|
|
||||||
def process_symlink(self, path, st):
|
def process_symlink(self, path, st):
|
||||||
source = os.readlink(path)
|
source = os.readlink(path)
|
||||||
item = {'path': path.lstrip('/\\:'), 'source': source}
|
item = {'path': path.lstrip('/\\:'), 'source': source}
|
||||||
item.update(self.stat_attrs(st, path))
|
item.update(self.stat_attrs(st, path))
|
||||||
self.items.append(item)
|
self.add_item(item)
|
||||||
|
|
||||||
def process_file(self, path, st, cache):
|
def process_file(self, path, st, cache):
|
||||||
safe_path = path.lstrip('/\\:')
|
safe_path = path.lstrip('/\\:')
|
||||||
|
@ -219,9 +232,8 @@ def process_file(self, path, st, cache):
|
||||||
if st.st_nlink > 1:
|
if st.st_nlink > 1:
|
||||||
source = self.hard_links.get((st.st_ino, st.st_dev))
|
source = self.hard_links.get((st.st_ino, st.st_dev))
|
||||||
if (st.st_ino, st.st_dev) in self.hard_links:
|
if (st.st_ino, st.st_dev) in self.hard_links:
|
||||||
self.items.append({'mode': st.st_mode,
|
self.add_item({'mode': st.st_mode,
|
||||||
'path': path,
|
'path': path, 'source': source})
|
||||||
'source': source})
|
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
self.hard_links[st.st_ino, st.st_dev] = safe_path
|
self.hard_links[st.st_ino, st.st_dev] = safe_path
|
||||||
|
@ -248,7 +260,7 @@ def process_file(self, path, st, cache):
|
||||||
cache.memorize_file_chunks(path_hash, st, ids)
|
cache.memorize_file_chunks(path_hash, st, ids)
|
||||||
item = {'path': safe_path, 'chunks': ids, 'size': size}
|
item = {'path': safe_path, 'chunks': ids, 'size': size}
|
||||||
item.update(self.stat_attrs(st, path))
|
item.update(self.stat_attrs(st, path))
|
||||||
self.items.append(item)
|
self.add_item(item)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def list_archives(store, keychain):
|
def list_archives(store, keychain):
|
||||||
|
|
|
@ -114,9 +114,8 @@ def do_extract(self, args):
|
||||||
store = self.open_store(args.archive)
|
store = self.open_store(args.archive)
|
||||||
keychain = Keychain(args.keychain)
|
keychain = Keychain(args.keychain)
|
||||||
archive = Archive(store, keychain, args.archive.archive)
|
archive = Archive(store, keychain, args.archive.archive)
|
||||||
archive.get_items()
|
|
||||||
dirs = []
|
dirs = []
|
||||||
for item in archive.items:
|
for item in archive.get_items():
|
||||||
if exclude_path(item['path'], args.patterns):
|
if exclude_path(item['path'], args.patterns):
|
||||||
continue
|
continue
|
||||||
self.print_verbose(item['path'].decode('utf-8'))
|
self.print_verbose(item['path'].decode('utf-8'))
|
||||||
|
@ -144,8 +143,7 @@ def do_list(self, args):
|
||||||
if args.src.archive:
|
if args.src.archive:
|
||||||
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
|
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
|
||||||
archive = Archive(store, keychain, args.src.archive)
|
archive = Archive(store, keychain, args.src.archive)
|
||||||
archive.get_items()
|
for item in archive.get_items():
|
||||||
for item in archive.items:
|
|
||||||
type = tmap.get(item['mode'] / 4096, '?')
|
type = tmap.get(item['mode'] / 4096, '?')
|
||||||
mode = format_file_mode(item['mode'])
|
mode = format_file_mode(item['mode'])
|
||||||
size = item.get('size', 0)
|
size = item.get('size', 0)
|
||||||
|
@ -158,11 +156,12 @@ def do_list(self, args):
|
||||||
return self.exit_code
|
return self.exit_code
|
||||||
|
|
||||||
def do_verify(self, args):
|
def do_verify(self, args):
|
||||||
|
import ipdb
|
||||||
|
ipdb.set_trace()
|
||||||
store = self.open_store(args.archive)
|
store = self.open_store(args.archive)
|
||||||
keychain = Keychain(args.keychain)
|
keychain = Keychain(args.keychain)
|
||||||
archive = Archive(store, keychain, args.archive.archive)
|
archive = Archive(store, keychain, args.archive.archive)
|
||||||
archive.get_items()
|
for item in archive.get_items():
|
||||||
for item in archive.items:
|
|
||||||
if stat.S_ISREG(item['mode']) and not 'source' in item:
|
if stat.S_ISREG(item['mode']) and not 'source' in item:
|
||||||
self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
|
self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
|
||||||
if archive.verify_file(item):
|
if archive.verify_file(item):
|
||||||
|
@ -183,7 +182,7 @@ def do_info(self, args):
|
||||||
print 'Username:', archive.metadata['username']
|
print 'Username:', archive.metadata['username']
|
||||||
print 'Time:', archive.metadata['time']
|
print 'Time:', archive.metadata['time']
|
||||||
print 'Command line:', ' '.join(archive.metadata['cmdline'])
|
print 'Command line:', ' '.join(archive.metadata['cmdline'])
|
||||||
print 'Number of Files:', len(archive.items)
|
print 'Number of Files:', len(archive.get_items())
|
||||||
print 'Original size:', format_file_size(osize)
|
print 'Original size:', format_file_size(osize)
|
||||||
print 'Compressed size:', format_file_size(csize)
|
print 'Compressed size:', format_file_size(csize)
|
||||||
print 'Unique data:', format_file_size(usize)
|
print 'Unique data:', format_file_size(usize)
|
||||||
|
|
Loading…
Reference in a new issue