From 1f056d9e8ab54066a9242cde7d89c83f2fd6c1dd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 15 Aug 2016 04:17:41 +0200 Subject: [PATCH] more safe interface for manifest.archives --- src/borg/archive.py | 38 ++++++++++---------- src/borg/archiver.py | 8 ++--- src/borg/cache.py | 8 ++--- src/borg/fuse.py | 6 ++-- src/borg/helpers.py | 84 +++++++++++++++++++++++++++++++++++--------- 5 files changed, 99 insertions(+), 45 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 561d5ef80..09a3d3f70 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -269,10 +269,10 @@ class Archive: break i += 1 else: - if name not in self.manifest.archives: + info = self.manifest.archives.get(name) + if info is None: raise self.DoesNotExist(name) - info = self.manifest.archives[name] - self.load(info[b'id']) + self.load(info.id) self.zeros = b'\0' * (1 << chunker_params[1]) def _load_meta(self, id): @@ -379,7 +379,7 @@ Number of files: {0.stats.nfiles}'''.format( data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') self.id = self.key.id_hash(data) self.cache.add_chunk(self.id, Chunk(data), self.stats) - self.manifest.archives[name] = {'id': self.id, 'time': metadata.time} + self.manifest.archives[name] = (self.id, metadata.time) self.manifest.write() self.repository.commit() self.cache.commit() @@ -593,7 +593,7 @@ Number of files: {0.stats.nfiles}'''.format( data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') new_id = self.key.id_hash(data) self.cache.add_chunk(new_id, Chunk(data), self.stats) - self.manifest.archives[self.name] = {'id': new_id, 'time': metadata.time} + self.manifest.archives[self.name] = (new_id, metadata.time) self.cache.chunk_decref(self.id, self.stats) self.id = new_id @@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format( @staticmethod def list_archives(repository, key, manifest, cache=None): # expensive! see also Manifest.list_archive_infos. - for name, info in manifest.archives.items(): + for name in manifest.archives: yield Archive(repository, key, manifest, name, cache=cache) @staticmethod @@ -1077,7 +1077,7 @@ class ArchiveChecker: if valid_archive(archive): archive = ArchiveItem(internal_dict=archive) logger.info('Found archive %s', archive.name) - manifest.archives[archive.name] = {b'id': chunk_id, b'time': archive.time} + manifest.archives[archive.name] = (chunk_id, archive.time) logger.info('Manifest rebuild complete.') return manifest @@ -1216,28 +1216,30 @@ class ArchiveChecker: if archive is None: # we need last N or all archives - archive_items = sorted(self.manifest.archives.items(), reverse=True, - key=lambda name_info: name_info[1][b'time']) + archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True) if prefix is not None: - archive_items = [item for item in archive_items if item[0].startswith(prefix)] - num_archives = len(archive_items) + archive_infos = [info for info in archive_infos if info.name.startswith(prefix)] + num_archives = len(archive_infos) end = None if last is None else min(num_archives, last) else: # we only want one specific archive - archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] - if not archive_items: + info = self.manifest.archives.get(archive) + if info is None: logger.error("Archive '%s' not found.", archive) + archive_infos = [] + else: + archive_infos = [info] num_archives = 1 end = 1 with cache_if_remote(self.repository) as repository: - for i, (name, info) in enumerate(archive_items[:end]): - logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) - archive_id = info[b'id'] + for i, info in enumerate(archive_infos[:end]): + logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives)) + archive_id = info.id if archive_id not in self.chunks: logger.error('Archive metadata block is missing!') self.error_found = True - del self.manifest.archives[name] + del self.manifest.archives[info.name] continue mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) @@ -1260,7 +1262,7 @@ class ArchiveChecker: new_archive_id = self.key.id_hash(data) cdata = self.key.encrypt(Chunk(data)) add_reference(new_archive_id, len(data), len(cdata), cdata) - info[b'id'] = new_archive_id + self.manifest.archives[info.name] = (new_archive_id, info.ts) def orphan_chunks_check(self): if self.check_all: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 4fc46e42d..78fdfe7a3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -734,7 +734,7 @@ class Archiver: msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.") else: msg.append("You requested to completely DELETE the repository *including* all archives it contains:") - for archive_info in manifest.list_archive_infos(sort_by='ts'): + for archive_info in manifest.archives.list(sort_by='ts'): msg.append(format_archive(archive_info)) msg.append("Type 'YES' if you understand this and want to continue: ") msg = '\n'.join(msg) @@ -812,7 +812,7 @@ class Archiver: format = "{archive:<36} {time} [{id}]{NL}" formatter = ArchiveFormatter(format) - for archive_info in manifest.list_archive_infos(sort_by='ts'): + for archive_info in manifest.archives.list(sort_by='ts'): if args.prefix and not archive_info.name.startswith(args.prefix): continue write(safe_encode(formatter.format_item(archive_info))) @@ -857,7 +857,7 @@ class Archiver: '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.') return self.exit_code - archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list + archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.prefix: archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)] is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search @@ -974,7 +974,7 @@ class Archiver: if args.target is not None: self.print_error('--target: Need to specify single archive') return self.exit_code - for archive in manifest.list_archive_infos(sort_by='ts'): + for archive in manifest.archives.list(sort_by='ts'): name = archive.name if recreater.is_temporary_archive(name): continue diff --git a/src/borg/cache.py b/src/borg/cache.py index 3f685a949..f325ff25d 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -279,7 +279,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" return set() def repo_archives(): - return set(info[b'id'] for info in self.manifest.archives.values()) + return set(info.id for info in self.manifest.archives.list()) def cleanup_outdated(ids): for id in ids: @@ -318,9 +318,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" return chunk_idx def lookup_name(archive_id): - for name, info in self.manifest.archives.items(): - if info[b'id'] == archive_id: - return name + for info in self.manifest.archives.list(): + if info.id == archive_id: + return info.name def create_master_idx(chunk_idx): logger.info('Synchronizing chunks cache...') diff --git a/src/borg/fuse.py b/src/borg/fuse.py index f5375924e..4e7cf10c5 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations): if archive: self.process_archive(archive) else: - for archive_name in manifest.archives: + for name in manifest.archives: # Create archive placeholder inode archive_inode = self._create_dir(parent=1) - self.contents[1][os.fsencode(archive_name)] = archive_inode - self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name) + self.contents[1][os.fsencode(name)] = archive_inode + self.pending_archives[archive_inode] = Archive(repository, key, manifest, name) def mount(self, mountpoint, mount_options, foreground=False): """Mount filesystem on *mountpoint* with *mount_options*.""" diff --git a/src/borg/helpers.py b/src/borg/helpers.py index ef42ee57e..759291442 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -18,7 +18,7 @@ import time import unicodedata import uuid from binascii import hexlify -from collections import namedtuple, deque +from collections import namedtuple, deque, abc from contextlib import contextmanager from datetime import datetime, timezone, timedelta from fnmatch import translate @@ -97,12 +97,76 @@ def check_extension_modules(): raise ExtensionModuleError +ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') + + +class Archives(abc.MutableMapping): + """ + Nice wrapper around the archives dict, making sure only valid types/values get in + and we can deal with str keys (and it internally encodes to byte keys) and eiter + str timestamps or datetime timestamps. + """ + def __init__(self): + # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts) + self._archives = {} + + def __len__(self): + return len(self._archives) + + def __iter__(self): + return iter(safe_decode(name) for name in self._archives) + + def __getitem__(self, name): + assert isinstance(name, str) + _name = safe_encode(name) + values = self._archives.get(_name) + if values is None: + raise KeyError + ts = parse_timestamp(values[b'time'].decode('utf-8')) + return ArchiveInfo(name=name, id=values[b'id'], ts=ts) + + def __setitem__(self, name, info): + assert isinstance(name, str) + name = safe_encode(name) + assert isinstance(info, tuple) + id, ts = info + assert isinstance(id, bytes) + if isinstance(ts, datetime): + ts = ts.replace(tzinfo=None).isoformat() + assert isinstance(ts, str) + ts = ts.encode() + self._archives[name] = {b'id': id, b'time': ts} + + def __delitem__(self, name): + assert isinstance(name, str) + name = safe_encode(name) + del self._archives[name] + + def list(self, sort_by=None, reverse=False): + # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts + archives = self.values() # [self[name] for name in self] + if sort_by is not None: + archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) + return archives + + def set_raw_dict(self, d): + """set the dict we get from the msgpack unpacker""" + for k, v in d.items(): + assert isinstance(k, bytes) + assert isinstance(v, dict) and b'id' in v and b'time' in v + self._archives[k] = v + + def get_raw_dict(self): + """get the dict we can give to the msgpack packer""" + return self._archives + + class Manifest: MANIFEST_ID = b'\0' * 32 def __init__(self, key, repository, item_keys=None): - self.archives = {} + self.archives = Archives() self.config = {} self.key = key self.repository = repository @@ -129,7 +193,7 @@ class Manifest: m = ManifestItem(internal_dict=msgpack.unpackb(data)) if m.get('version') != 1: raise ValueError('Invalid manifest version') - manifest.archives = {safe_decode(k): v for k, v in m.archives.items()} + manifest.archives.set_raw_dict(m.archives) manifest.timestamp = m.get('timestamp') manifest.config = m.config # valid item keys are whatever is known in the repo or every key we know @@ -141,7 +205,7 @@ class Manifest: self.timestamp = datetime.utcnow().isoformat() manifest = ManifestItem( version=1, - archives=self.archives, + archives=self.archives.get_raw_dict(), timestamp=self.timestamp, config=self.config, item_keys=tuple(self.item_keys), @@ -150,18 +214,6 @@ class Manifest: self.id = self.key.id_hash(data) self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data))) - def list_archive_infos(self, sort_by=None, reverse=False): - # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts - ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') - archives = [] - for name, values in self.archives.items(): - ts = parse_timestamp(values[b'time'].decode('utf-8')) - id = values[b'id'] - archives.append(ArchiveInfo(name=name, id=id, ts=ts)) - if sort_by is not None: - archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) - return archives - def prune_within(archives, within): multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}