more safe interface for manifest.archives

This commit is contained in:
Thomas Waldmann 2016-08-15 04:17:41 +02:00
parent b6d0eb99a5
commit 1f056d9e8a
5 changed files with 99 additions and 45 deletions

View File

@ -269,10 +269,10 @@ class Archive:
break
i += 1
else:
if name not in self.manifest.archives:
info = self.manifest.archives.get(name)
if info is None:
raise self.DoesNotExist(name)
info = self.manifest.archives[name]
self.load(info[b'id'])
self.load(info.id)
self.zeros = b'\0' * (1 << chunker_params[1])
def _load_meta(self, id):
@ -379,7 +379,7 @@ Number of files: {0.stats.nfiles}'''.format(
data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
self.id = self.key.id_hash(data)
self.cache.add_chunk(self.id, Chunk(data), self.stats)
self.manifest.archives[name] = {'id': self.id, 'time': metadata.time}
self.manifest.archives[name] = (self.id, metadata.time)
self.manifest.write()
self.repository.commit()
self.cache.commit()
@ -593,7 +593,7 @@ Number of files: {0.stats.nfiles}'''.format(
data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
new_id = self.key.id_hash(data)
self.cache.add_chunk(new_id, Chunk(data), self.stats)
self.manifest.archives[self.name] = {'id': new_id, 'time': metadata.time}
self.manifest.archives[self.name] = (new_id, metadata.time)
self.cache.chunk_decref(self.id, self.stats)
self.id = new_id
@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format(
@staticmethod
def list_archives(repository, key, manifest, cache=None):
# expensive! see also Manifest.list_archive_infos.
for name, info in manifest.archives.items():
for name in manifest.archives:
yield Archive(repository, key, manifest, name, cache=cache)
@staticmethod
@ -1077,7 +1077,7 @@ class ArchiveChecker:
if valid_archive(archive):
archive = ArchiveItem(internal_dict=archive)
logger.info('Found archive %s', archive.name)
manifest.archives[archive.name] = {b'id': chunk_id, b'time': archive.time}
manifest.archives[archive.name] = (chunk_id, archive.time)
logger.info('Manifest rebuild complete.')
return manifest
@ -1216,28 +1216,30 @@ class ArchiveChecker:
if archive is None:
# we need last N or all archives
archive_items = sorted(self.manifest.archives.items(), reverse=True,
key=lambda name_info: name_info[1][b'time'])
archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True)
if prefix is not None:
archive_items = [item for item in archive_items if item[0].startswith(prefix)]
num_archives = len(archive_items)
archive_infos = [info for info in archive_infos if info.name.startswith(prefix)]
num_archives = len(archive_infos)
end = None if last is None else min(num_archives, last)
else:
# we only want one specific archive
archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
if not archive_items:
info = self.manifest.archives.get(archive)
if info is None:
logger.error("Archive '%s' not found.", archive)
archive_infos = []
else:
archive_infos = [info]
num_archives = 1
end = 1
with cache_if_remote(self.repository) as repository:
for i, (name, info) in enumerate(archive_items[:end]):
logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
archive_id = info[b'id']
for i, info in enumerate(archive_infos[:end]):
logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives))
archive_id = info.id
if archive_id not in self.chunks:
logger.error('Archive metadata block is missing!')
self.error_found = True
del self.manifest.archives[name]
del self.manifest.archives[info.name]
continue
mark_as_possibly_superseded(archive_id)
cdata = self.repository.get(archive_id)
@ -1260,7 +1262,7 @@ class ArchiveChecker:
new_archive_id = self.key.id_hash(data)
cdata = self.key.encrypt(Chunk(data))
add_reference(new_archive_id, len(data), len(cdata), cdata)
info[b'id'] = new_archive_id
self.manifest.archives[info.name] = (new_archive_id, info.ts)
def orphan_chunks_check(self):
if self.check_all:

View File

@ -734,7 +734,7 @@ class Archiver:
msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
else:
msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
for archive_info in manifest.list_archive_infos(sort_by='ts'):
for archive_info in manifest.archives.list(sort_by='ts'):
msg.append(format_archive(archive_info))
msg.append("Type 'YES' if you understand this and want to continue: ")
msg = '\n'.join(msg)
@ -812,7 +812,7 @@ class Archiver:
format = "{archive:<36} {time} [{id}]{NL}"
formatter = ArchiveFormatter(format)
for archive_info in manifest.list_archive_infos(sort_by='ts'):
for archive_info in manifest.archives.list(sort_by='ts'):
if args.prefix and not archive_info.name.startswith(args.prefix):
continue
write(safe_encode(formatter.format_item(archive_info)))
@ -857,7 +857,7 @@ class Archiver:
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
return self.exit_code
archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True) # just a ArchiveInfo list
if args.prefix:
archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
@ -974,7 +974,7 @@ class Archiver:
if args.target is not None:
self.print_error('--target: Need to specify single archive')
return self.exit_code
for archive in manifest.list_archive_infos(sort_by='ts'):
for archive in manifest.archives.list(sort_by='ts'):
name = archive.name
if recreater.is_temporary_archive(name):
continue

View File

@ -279,7 +279,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
return set()
def repo_archives():
return set(info[b'id'] for info in self.manifest.archives.values())
return set(info.id for info in self.manifest.archives.list())
def cleanup_outdated(ids):
for id in ids:
@ -318,9 +318,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
return chunk_idx
def lookup_name(archive_id):
for name, info in self.manifest.archives.items():
if info[b'id'] == archive_id:
return name
for info in self.manifest.archives.list():
if info.id == archive_id:
return info.name
def create_master_idx(chunk_idx):
logger.info('Synchronizing chunks cache...')

View File

@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations):
if archive:
self.process_archive(archive)
else:
for archive_name in manifest.archives:
for name in manifest.archives:
# Create archive placeholder inode
archive_inode = self._create_dir(parent=1)
self.contents[1][os.fsencode(archive_name)] = archive_inode
self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
self.contents[1][os.fsencode(name)] = archive_inode
self.pending_archives[archive_inode] = Archive(repository, key, manifest, name)
def mount(self, mountpoint, mount_options, foreground=False):
"""Mount filesystem on *mountpoint* with *mount_options*."""

View File

@ -18,7 +18,7 @@ import time
import unicodedata
import uuid
from binascii import hexlify
from collections import namedtuple, deque
from collections import namedtuple, deque, abc
from contextlib import contextmanager
from datetime import datetime, timezone, timedelta
from fnmatch import translate
@ -97,12 +97,76 @@ def check_extension_modules():
raise ExtensionModuleError
ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
class Archives(abc.MutableMapping):
"""
Nice wrapper around the archives dict, making sure only valid types/values get in
and we can deal with str keys (and it internally encodes to byte keys) and eiter
str timestamps or datetime timestamps.
"""
def __init__(self):
# key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
self._archives = {}
def __len__(self):
return len(self._archives)
def __iter__(self):
return iter(safe_decode(name) for name in self._archives)
def __getitem__(self, name):
assert isinstance(name, str)
_name = safe_encode(name)
values = self._archives.get(_name)
if values is None:
raise KeyError
ts = parse_timestamp(values[b'time'].decode('utf-8'))
return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
def __setitem__(self, name, info):
assert isinstance(name, str)
name = safe_encode(name)
assert isinstance(info, tuple)
id, ts = info
assert isinstance(id, bytes)
if isinstance(ts, datetime):
ts = ts.replace(tzinfo=None).isoformat()
assert isinstance(ts, str)
ts = ts.encode()
self._archives[name] = {b'id': id, b'time': ts}
def __delitem__(self, name):
assert isinstance(name, str)
name = safe_encode(name)
del self._archives[name]
def list(self, sort_by=None, reverse=False):
# inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
archives = self.values() # [self[name] for name in self]
if sort_by is not None:
archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
return archives
def set_raw_dict(self, d):
"""set the dict we get from the msgpack unpacker"""
for k, v in d.items():
assert isinstance(k, bytes)
assert isinstance(v, dict) and b'id' in v and b'time' in v
self._archives[k] = v
def get_raw_dict(self):
"""get the dict we can give to the msgpack packer"""
return self._archives
class Manifest:
MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository, item_keys=None):
self.archives = {}
self.archives = Archives()
self.config = {}
self.key = key
self.repository = repository
@ -129,7 +193,7 @@ class Manifest:
m = ManifestItem(internal_dict=msgpack.unpackb(data))
if m.get('version') != 1:
raise ValueError('Invalid manifest version')
manifest.archives = {safe_decode(k): v for k, v in m.archives.items()}
manifest.archives.set_raw_dict(m.archives)
manifest.timestamp = m.get('timestamp')
manifest.config = m.config
# valid item keys are whatever is known in the repo or every key we know
@ -141,7 +205,7 @@ class Manifest:
self.timestamp = datetime.utcnow().isoformat()
manifest = ManifestItem(
version=1,
archives=self.archives,
archives=self.archives.get_raw_dict(),
timestamp=self.timestamp,
config=self.config,
item_keys=tuple(self.item_keys),
@ -150,18 +214,6 @@ class Manifest:
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
def list_archive_infos(self, sort_by=None, reverse=False):
# inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
archives = []
for name, values in self.archives.items():
ts = parse_timestamp(values[b'time'].decode('utf-8'))
id = values[b'id']
archives.append(ArchiveInfo(name=name, id=id, ts=ts))
if sort_by is not None:
archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
return archives
def prune_within(archives, within):
multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}