mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-27 02:08:54 +00:00
chunks index resync: do all in one pass
if we do not have a cached archive index: fetch and build and merge it if we have one: merge it
This commit is contained in:
parent
22dd925986
commit
54ccbc5ae2
1 changed files with 51 additions and 49 deletions
100
borg/cache.py
100
borg/cache.py
|
@ -212,6 +212,23 @@ def sync(self):
|
||||||
"""
|
"""
|
||||||
archive_path = os.path.join(self.path, 'chunks.archive.d')
|
archive_path = os.path.join(self.path, 'chunks.archive.d')
|
||||||
|
|
||||||
|
def mkpath(id, suffix=''):
|
||||||
|
id_hex = hexlify(id).decode('ascii')
|
||||||
|
path = os.path.join(archive_path, id_hex + suffix)
|
||||||
|
return path.encode('utf-8')
|
||||||
|
|
||||||
|
def cached_archives():
|
||||||
|
fns = os.listdir(archive_path)
|
||||||
|
# filenames with 64 hex digits == 256bit
|
||||||
|
return set(unhexlify(fn) for fn in fns if len(fn) == 64)
|
||||||
|
|
||||||
|
def repo_archives():
|
||||||
|
return set(info[b'id'] for info in self.manifest.archives.values())
|
||||||
|
|
||||||
|
def cleanup_outdated(ids):
|
||||||
|
for id in ids:
|
||||||
|
os.unlink(mkpath(id))
|
||||||
|
|
||||||
def add(chunk_idx, id, size, csize, incr=1):
|
def add(chunk_idx, id, size, csize, incr=1):
|
||||||
try:
|
try:
|
||||||
count, size, csize = chunk_idx[id]
|
count, size, csize = chunk_idx[id]
|
||||||
|
@ -219,20 +236,6 @@ def add(chunk_idx, id, size, csize, incr=1):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
chunk_idx[id] = incr, size, csize
|
chunk_idx[id] = incr, size, csize
|
||||||
|
|
||||||
def mkpath(id, suffix=''):
|
|
||||||
path = os.path.join(archive_path, id + suffix)
|
|
||||||
return path.encode('utf-8')
|
|
||||||
|
|
||||||
def list_archives():
|
|
||||||
fns = os.listdir(archive_path)
|
|
||||||
# only return filenames that are 64 hex digits (256bit)
|
|
||||||
return [fn for fn in fns if len(fn) == 64]
|
|
||||||
|
|
||||||
def cleanup_outdated(ids):
|
|
||||||
for id in ids:
|
|
||||||
id_hex = hexlify(id).decode('ascii')
|
|
||||||
os.unlink(mkpath(id_hex))
|
|
||||||
|
|
||||||
def fetch_and_build_idx(archive_id, repository, key):
|
def fetch_and_build_idx(archive_id, repository, key):
|
||||||
chunk_idx = ChunkIndex()
|
chunk_idx = ChunkIndex()
|
||||||
cdata = repository.get(archive_id)
|
cdata = repository.get(archive_id)
|
||||||
|
@ -242,7 +245,6 @@ def fetch_and_build_idx(archive_id, repository, key):
|
||||||
if archive[b'version'] != 1:
|
if archive[b'version'] != 1:
|
||||||
raise Exception('Unknown archive metadata version')
|
raise Exception('Unknown archive metadata version')
|
||||||
decode_dict(archive, (b'name',))
|
decode_dict(archive, (b'name',))
|
||||||
print('Analyzing new archive:', archive[b'name'])
|
|
||||||
unpacker = msgpack.Unpacker()
|
unpacker = msgpack.Unpacker()
|
||||||
for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
|
for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
|
||||||
data = key.decrypt(item_id, chunk)
|
data = key.decrypt(item_id, chunk)
|
||||||
|
@ -255,33 +257,43 @@ def fetch_and_build_idx(archive_id, repository, key):
|
||||||
if b'chunks' in item:
|
if b'chunks' in item:
|
||||||
for chunk_id, size, csize in item[b'chunks']:
|
for chunk_id, size, csize in item[b'chunks']:
|
||||||
add(chunk_idx, chunk_id, size, csize)
|
add(chunk_idx, chunk_id, size, csize)
|
||||||
archive_id_hex = hexlify(archive_id).decode('ascii')
|
fn = mkpath(archive_id)
|
||||||
fn = mkpath(archive_id_hex)
|
fn_tmp = mkpath(archive_id, suffix='.tmp')
|
||||||
fn_tmp = mkpath(archive_id_hex, suffix='.tmp')
|
|
||||||
try:
|
try:
|
||||||
chunk_idx.write(fn_tmp)
|
chunk_idx.write(fn_tmp)
|
||||||
except Exception:
|
except Exception:
|
||||||
os.unlink(fn_tmp)
|
os.unlink(fn_tmp)
|
||||||
else:
|
else:
|
||||||
os.rename(fn_tmp, fn)
|
os.rename(fn_tmp, fn)
|
||||||
|
return chunk_idx
|
||||||
|
|
||||||
|
def lookup_name(archive_id):
|
||||||
|
for name, info in self.manifest.archives.items():
|
||||||
|
if info[b'id'] == archive_id:
|
||||||
|
return name
|
||||||
|
|
||||||
def create_master_idx(chunk_idx):
|
def create_master_idx(chunk_idx):
|
||||||
|
print('Synchronizing chunks cache...')
|
||||||
|
cached_ids = cached_archives()
|
||||||
|
archive_ids = repo_archives()
|
||||||
|
print('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
|
||||||
|
len(archive_ids), len(cached_ids),
|
||||||
|
len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
|
||||||
# deallocates old hashindex, creates empty hashindex:
|
# deallocates old hashindex, creates empty hashindex:
|
||||||
chunk_idx.clear()
|
chunk_idx.clear()
|
||||||
archives = list_archives()
|
cleanup_outdated(cached_ids - archive_ids)
|
||||||
if archives:
|
if archive_ids:
|
||||||
chunk_idx = None
|
chunk_idx = None
|
||||||
for fn in archives:
|
for archive_id in archive_ids:
|
||||||
archive_id_hex = fn
|
archive_name = lookup_name(archive_id)
|
||||||
archive_id = unhexlify(archive_id_hex)
|
if archive_id in cached_ids:
|
||||||
for name, info in self.manifest.archives.items():
|
archive_chunk_idx_path = mkpath(archive_id)
|
||||||
if info[b'id'] == archive_id:
|
print("Reading cached archive chunk index for %s ..." % archive_name)
|
||||||
archive_name = name
|
archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
|
||||||
break
|
else:
|
||||||
archive_chunk_idx_path = mkpath(archive_id_hex)
|
print('Fetching and building archive index for %s ...' % archive_name)
|
||||||
print("- reading archive %s ..." % archive_name)
|
archive_chunk_idx = fetch_and_build_idx(archive_id, repository, self.key)
|
||||||
archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
|
print("Merging into master chunks index ...")
|
||||||
print("- merging archive ...")
|
|
||||||
if chunk_idx is None:
|
if chunk_idx is None:
|
||||||
# we just use the first archive's idx as starting point,
|
# we just use the first archive's idx as starting point,
|
||||||
# to avoid growing the hash table from 0 size and also
|
# to avoid growing the hash table from 0 size and also
|
||||||
|
@ -289,38 +301,28 @@ def create_master_idx(chunk_idx):
|
||||||
chunk_idx = archive_chunk_idx
|
chunk_idx = archive_chunk_idx
|
||||||
else:
|
else:
|
||||||
chunk_idx.merge(archive_chunk_idx)
|
chunk_idx.merge(archive_chunk_idx)
|
||||||
|
print('Done.')
|
||||||
return chunk_idx
|
return chunk_idx
|
||||||
|
|
||||||
def legacy_support():
|
def legacy_cleanup():
|
||||||
|
"""bring old cache dirs into the desired state (cleanup and adapt)"""
|
||||||
try:
|
try:
|
||||||
# get rid of the compressed tar file, if present
|
|
||||||
os.unlink(os.path.join(self.path, 'chunks.archive'))
|
os.unlink(os.path.join(self.path, 'chunks.archive'))
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
# create the directory for the archive index files we use now
|
os.unlink(os.path.join(self.path, 'chunks.archive.tmp'))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
os.mkdir(archive_path)
|
os.mkdir(archive_path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
self.begin_txn()
|
self.begin_txn()
|
||||||
print('Synchronizing chunks cache...')
|
|
||||||
repository = cache_if_remote(self.repository)
|
repository = cache_if_remote(self.repository)
|
||||||
legacy_support()
|
legacy_cleanup()
|
||||||
known_ids = set(unhexlify(hexid) for hexid in list_archives())
|
|
||||||
archive_ids = set(info[b'id'] for info in self.manifest.archives.values())
|
|
||||||
print('Rebuilding archive collection. Repo: %d Known: %d Outdated: %d Unknown: %d' % (
|
|
||||||
len(archive_ids), len(known_ids),
|
|
||||||
len(known_ids - archive_ids), len(archive_ids - known_ids), ))
|
|
||||||
cleanup_outdated(known_ids - archive_ids)
|
|
||||||
for archive_id in archive_ids - known_ids:
|
|
||||||
fetch_and_build_idx(archive_id, repository, self.key)
|
|
||||||
known_ids = set(unhexlify(hexid) for hexid in list_archives())
|
|
||||||
assert known_ids == archive_ids
|
|
||||||
print('Merging collection into master chunks cache...')
|
|
||||||
self.chunks = create_master_idx(self.chunks)
|
self.chunks = create_master_idx(self.chunks)
|
||||||
print('Done.')
|
|
||||||
|
|
||||||
def add_chunk(self, id, data, stats):
|
def add_chunk(self, id, data, stats):
|
||||||
if not self.txn_active:
|
if not self.txn_active:
|
||||||
|
|
Loading…
Reference in a new issue