1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2024-12-24 08:45:13 +00:00

More optimizations

This commit is contained in:
Jonas Borgström 2011-08-02 21:45:21 +02:00
parent 2d47afe368
commit 2d6df6454e
3 changed files with 70 additions and 31 deletions

View file

@ -6,12 +6,12 @@
import socket import socket
import stat import stat
import sys import sys
from os.path import dirname from zlib import crc32
from xattr import xattr, XATTR_NOFOLLOW from xattr import xattr, XATTR_NOFOLLOW
from . import NS_ARCHIVE_METADATA, NS_CHUNK from . import NS_ARCHIVE_METADATA, NS_CHUNK
from ._speedups import chunkify from ._speedups import chunkify
from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError, Counter
CHUNK_SIZE = 64 * 1024 CHUNK_SIZE = 64 * 1024
WINDOW_SIZE = 4096 WINDOW_SIZE = 4096
@ -31,7 +31,6 @@ def __init__(self, store, key, name=None, cache=None):
self.cache = cache self.cache = cache
self.items = '' self.items = ''
self.items_refs = [] self.items_refs = []
self.items_prefix = ''
self.items_ids = [] self.items_ids = []
self.hard_links = {} self.hard_links = {}
if name: if name:
@ -54,28 +53,33 @@ def ts(self):
def iter_items(self, callback): def iter_items(self, callback):
unpacker = msgpack.Unpacker() unpacker = msgpack.Unpacker()
counter = Counter(0)
def cb(chunk, error, id): def cb(chunk, error, id):
counter.dec()
print len(chunk)
data, items_hash = self.key.decrypt(chunk) data, items_hash = self.key.decrypt(chunk)
assert self.key.id_hash(data) == id assert self.key.id_hash(data) == id
unpacker.feed(data) unpacker.feed(data)
for item in unpacker: for item in unpacker:
callback(item) callback(item)
for id, size, csize in self.metadata['items']: for id, size, csize in self.metadata['items']:
# Limit the number of concurrent items requests to 3
self.store.flush_rpc(counter, 10)
counter.inc()
self.store.get(NS_CHUNK, id, callback=cb, callback_data=id) self.store.get(NS_CHUNK, id, callback=cb, callback_data=id)
def add_item(self, item, refs=None): def add_item(self, item, refs=None):
data = msgpack.packb(item) data = msgpack.packb(item)
prefix = dirname(item['path']) if crc32(item['path'].encode('utf-8')) % 1000 == 0:
if self.items_prefix and self.items_prefix != prefix:
self.flush_items() self.flush_items()
if refs: if refs:
self.items_refs += refs self.items_refs += refs
self.items += data self.items += data
self.items_prefix = prefix
def flush_items(self): def flush_items(self):
if not self.items: if not self.items:
return return
print 'flush', len(self.items)
id = self.key.id_hash(self.items) id = self.key.id_hash(self.items)
if self.cache.seen_chunk(id): if self.cache.seen_chunk(id):
self.items_ids.append(self.cache.chunk_incref(id)) self.items_ids.append(self.cache.chunk_incref(id))
@ -85,7 +89,6 @@ def flush_items(self):
self.items_ids.append(self.cache.add_chunk(id, self.items)) self.items_ids.append(self.cache.add_chunk(id, self.items))
self.items = '' self.items = ''
self.items_refs = [] self.items_refs = []
self.items_prefix = ''
def save(self, name, cache): def save(self, name, cache):
self.id = self.key.archive_hash(name) self.id = self.key.archive_hash(name)
@ -171,27 +174,28 @@ def extract_item(self, item, dest=None, start_cb=None):
os.unlink(path) os.unlink(path)
os.link(source, path) os.link(source, path)
else: else:
def extract_cb(chunk, error, (id, i, last)): def extract_cb(chunk, error, (id, i)):
if i==0: if i == 0:
state['fd'] = open(path, 'wb')
start_cb(item) start_cb(item)
assert not error assert not error
data, hash = self.key.decrypt(chunk) data, hash = self.key.decrypt(chunk)
if self.key.id_hash(data) != id: if self.key.id_hash(data) != id:
raise IntegrityError('chunk hash did not match') raise IntegrityError('chunk hash did not match')
fd.write(data) state['fd'].write(data)
if last: if i == n - 1:
fd.close() state['fd'].close()
self.restore_attrs(path, item) self.restore_attrs(path, item)
state = {}
fd = open(path, 'wb')
n = len(item['chunks']) n = len(item['chunks'])
## 0 chunks indicates an empty (0 bytes) file
if n == 0: if n == 0:
open(path, 'wb').close()
start_cb(item) start_cb(item)
self.restore_attrs(path, item) self.restore_attrs(path, item)
fd.close()
else: else:
for i, (id, size, csize) in enumerate(item['chunks']): for i, (id, size, csize) in enumerate(item['chunks']):
self.store.get(NS_CHUNK, id, callback=extract_cb, callback_data=(id, i, i==n-1)) self.store.get(NS_CHUNK, id, callback=extract_cb, callback_data=(id, i))
else: else:
raise Exception('Unknown archive item type %r' % item['mode']) raise Exception('Unknown archive item type %r' % item['mode'])

View file

@ -10,6 +10,25 @@
import struct import struct
import time import time
class Counter(object):
__slots__ = ('v',)
def __init__(self, value=0):
self.v = value
def inc(self, amount=1):
self.v += amount
def dec(self, amount=1):
self.v -= amount
def __cmp__(self, x):
return cmp(self.v, x)
def __repr__(self):
return '<Counter(%r)>' % self.v
def deferrable(f): def deferrable(f):
def wrapper(*args, **kw): def wrapper(*args, **kw):

View file

@ -7,6 +7,7 @@
import getpass import getpass
from .store import Store from .store import Store
from .helpers import Counter
BUFSIZE = 1024 * 1024 BUFSIZE = 1024 * 1024
@ -16,10 +17,10 @@ class ChannelNotifyer(object):
def __init__(self, channel): def __init__(self, channel):
self.channel = channel self.channel = channel
self.enabled = 0 self.enabled = Counter()
def set(self): def set(self):
if self.enabled: if self.enabled > 0:
with self.channel.lock: with self.channel.lock:
self.channel.out_buffer_cv.notifyAll() self.channel.out_buffer_cv.notifyAll()
@ -106,6 +107,8 @@ def __init__(self, location, create=False):
self.channel.exec_command('darc serve') self.channel.exec_command('darc serve')
self.callbacks = {} self.callbacks = {}
self.msgid = 0 self.msgid = 0
self.recursion = 0
self.odata = ''
self.id, self.tid = self.cmd('open', (location.path, create)) self.id, self.tid = self.cmd('open', (location.path, create))
def wait(self, write=True): def wait(self, write=True):
@ -113,39 +116,46 @@ def wait(self, write=True):
if ((not write or self.channel.out_window_size == 0) and if ((not write or self.channel.out_window_size == 0) and
len(self.channel.in_buffer._buffer) == 0 and len(self.channel.in_buffer._buffer) == 0 and
len(self.channel.in_stderr_buffer._buffer) == 0): len(self.channel.in_stderr_buffer._buffer) == 0):
self.channel.out_buffer_cv.wait(10) self.channel.out_buffer_cv.wait(1)
def cmd(self, cmd, args, callback=None, callback_data=None): def cmd(self, cmd, args, callback=None, callback_data=None):
self.msgid += 1 self.msgid += 1
self.notifier.enabled += 1 self.notifier.enabled.inc()
odata = msgpack.packb((0, self.msgid, cmd, args)) self.odata += msgpack.packb((0, self.msgid, cmd, args))
self.recursion += 1
if callback: if callback:
self.callbacks[self.msgid] = callback, callback_data self.callbacks[self.msgid] = callback, callback_data
if self.recursion > 1:
self.recursion -= 1
return
while True: while True:
if self.channel.closed: if self.channel.closed:
self.recursion -= 1
raise Exception('Connection closed') raise Exception('Connection closed')
elif self.channel.recv_stderr_ready(): elif self.channel.recv_stderr_ready():
print >> sys.stderr, 'remote stderr:', self.channel.recv_stderr(BUFSIZE) print >> sys.stderr, 'remote stderr:', self.channel.recv_stderr(BUFSIZE)
elif self.channel.recv_ready(): elif self.channel.recv_ready():
self.unpacker.feed(self.channel.recv(BUFSIZE)) self.unpacker.feed(self.channel.recv(BUFSIZE))
for type, msgid, error, res in self.unpacker: for type, msgid, error, res in self.unpacker:
self.notifier.enabled -= 1 self.notifier.enabled.dec()
if msgid == self.msgid: if msgid == self.msgid:
if error: if error:
raise self.RPCError(error) raise self.RPCError(error)
self.recursion -= 1
return res return res
else: else:
c, d = self.callbacks.pop(msgid, (None, None)) c, d = self.callbacks.pop(msgid, (None, None))
if c: if c:
c(res, error, d) c(res, error, d)
elif odata and self.channel.send_ready(): elif self.odata and self.channel.send_ready():
n = self.channel.send(odata) n = self.channel.send(self.odata)
if n > 0: if n > 0:
odata = odata[n:] self.odata = self.odata[n:]
if not odata and callback: if not self.odata and callback:
self.recursion -= 1
return return
else: else:
self.wait(odata) self.wait(self.odata)
def commit(self, *args): def commit(self, *args):
self.cmd('commit', args) self.cmd('commit', args)
@ -176,20 +186,26 @@ def delete(self, ns, id, callback=None, callback_data=None):
def list(self, *args): def list(self, *args):
return self.cmd('list', args) return self.cmd('list', args)
def flush_rpc(self): def flush_rpc(self, counter=None, backlog=0):
while True: counter = counter or self.notifier.enabled
while counter > backlog:
if self.channel.closed: if self.channel.closed:
raise Exception('Connection closed') raise Exception('Connection closed')
elif self.odata and self.channel.send_ready():
n = self.channel.send(self.odata)
if n > 0:
self.odata = self.odata[n:]
elif self.channel.recv_stderr_ready(): elif self.channel.recv_stderr_ready():
print >> sys.stderr, 'remote stderr:', self.channel.recv_stderr(BUFSIZE) print >> sys.stderr, 'remote stderr:', self.channel.recv_stderr(BUFSIZE)
elif self.channel.recv_ready(): elif self.channel.recv_ready():
self.unpacker.feed(self.channel.recv(BUFSIZE)) self.unpacker.feed(self.channel.recv(BUFSIZE))
for type, msgid, error, res in self.unpacker: for type, msgid, error, res in self.unpacker:
self.notifier.enabled -= 1 self.notifier.enabled.dec()
c, d = self.callbacks.pop(msgid, (None, None)) c, d = self.callbacks.pop(msgid, (None, None))
if c: if c:
c(res, error, d) c(res, error, d)
if msgid == self.msgid: if msgid == self.msgid:
return return
else: else:
self.wait() self.wait(self.odata)