From c8922c8b3dfe79a56ae99587647b2cfc99bf8233 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 15 Aug 2016 01:11:33 +0200 Subject: [PATCH] use ArchiveItem --- src/borg/archive.py | 70 +++++++++++++++++----------------- src/borg/archiver.py | 14 +++---- src/borg/cache.py | 9 ++--- src/borg/constants.py | 2 - src/borg/fuse.py | 2 +- src/borg/item.py | 4 +- src/borg/testsuite/archive.py | 4 +- src/borg/testsuite/archiver.py | 2 +- 8 files changed, 53 insertions(+), 54 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 7a4d7e877..561d5ef80 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -35,7 +35,7 @@ from .helpers import ProgressIndicatorPercent, log_multi from .helpers import PathPrefixPattern, FnmatchPattern from .helpers import consume from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec -from .item import Item +from .item import Item, ArchiveItem from .key import key_factory from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .remote import cache_if_remote @@ -277,29 +277,28 @@ class Archive: def _load_meta(self, id): _, data = self.key.decrypt(id, self.repository.get(id)) - metadata = msgpack.unpackb(data) - if metadata[b'version'] != 1: + metadata = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if metadata.version != 1: raise Exception('Unknown archive metadata version') return metadata def load(self, id): self.id = id self.metadata = self._load_meta(self.id) - decode_dict(self.metadata, ARCHIVE_TEXT_KEYS) - self.metadata[b'cmdline'] = [safe_decode(arg) for arg in self.metadata[b'cmdline']] - self.name = self.metadata[b'name'] + self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline] + self.name = self.metadata.name @property def ts(self): """Timestamp of archive creation (start) in UTC""" - ts = self.metadata[b'time'] + ts = self.metadata.time return parse_timestamp(ts) @property def ts_end(self): """Timestamp of archive creation (end) in UTC""" # fall back to time if there is no time_end present in metadata - ts = self.metadata.get(b'time_end') or self.metadata[b'time'] + ts = self.metadata.get('time_end') or self.metadata.time return parse_timestamp(ts) @property @@ -336,7 +335,7 @@ Number of files: {0.stats.nfiles}'''.format( return filter(item) if filter else True def iter_items(self, filter=None, preload=False): - for item in self.pipeline.unpack_many(self.metadata[b'items'], preload=preload, + for item in self.pipeline.unpack_many(self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)): yield item @@ -366,7 +365,7 @@ Number of files: {0.stats.nfiles}'''.format( metadata = { 'version': 1, 'name': name, - 'comment': comment, + 'comment': comment or '', 'items': self.items_buffer.chunks, 'cmdline': sys.argv, 'hostname': socket.gethostname(), @@ -376,10 +375,11 @@ Number of files: {0.stats.nfiles}'''.format( 'chunker_params': self.chunker_params, } metadata.update(additional_metadata or {}) - data = msgpack.packb(StableDict(metadata), unicode_errors='surrogateescape') + metadata = ArchiveItem(metadata) + data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') self.id = self.key.id_hash(data) self.cache.add_chunk(self.id, Chunk(data), self.stats) - self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']} + self.manifest.archives[name] = {'id': self.id, 'time': metadata.time} self.manifest.write() self.repository.commit() self.cache.commit() @@ -400,7 +400,7 @@ Number of files: {0.stats.nfiles}'''.format( cache.begin_txn() stats = Statistics() add(self.id) - for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): + for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): add(id) _, data = self.key.decrypt(id, chunk) unpacker.feed(data) @@ -588,12 +588,12 @@ Number of files: {0.stats.nfiles}'''.format( raise def set_meta(self, key, value): - metadata = StableDict(self._load_meta(self.id)) - metadata[key] = value - data = msgpack.packb(metadata, unicode_errors='surrogateescape') + metadata = self._load_meta(self.id) + setattr(metadata, key, value) + data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') new_id = self.key.id_hash(data) self.cache.add_chunk(new_id, Chunk(data), self.stats) - self.manifest.archives[self.name] = {'id': new_id, 'time': metadata[b'time']} + self.manifest.archives[self.name] = {'id': new_id, 'time': metadata.time} self.cache.chunk_decref(self.id, self.stats) self.id = new_id @@ -602,7 +602,7 @@ Number of files: {0.stats.nfiles}'''.format( raise self.AlreadyExists(name) oldname = self.name self.name = name - self.set_meta(b'name', name) + self.set_meta('name', name) del self.manifest.archives[oldname] def delete(self, stats, progress=False, forced=False): @@ -625,7 +625,7 @@ Number of files: {0.stats.nfiles}'''.format( error = False try: unpacker = msgpack.Unpacker(use_list=False) - items_ids = self.metadata[b'items'] + items_ids = self.metadata.items pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True) for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): if progress: @@ -1075,8 +1075,9 @@ class ArchiveChecker: except (TypeError, ValueError, StopIteration): continue if valid_archive(archive): - logger.info('Found archive %s', archive[b'name'].decode('utf-8')) - manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} + archive = ArchiveItem(internal_dict=archive) + logger.info('Found archive %s', archive.name) + manifest.archives[archive.name] = {b'id': chunk_id, b'time': archive.time} logger.info('Manifest rebuild complete.') return manifest @@ -1187,7 +1188,7 @@ class ArchiveChecker: return required_item_keys.issubset(keys) and keys.issubset(item_keys) i = 0 - for state, items in groupby(archive[b'items'], missing_chunk_detector): + for state, items in groupby(archive.items, missing_chunk_detector): items = list(items) if state % 2: for chunk_id in items: @@ -1241,11 +1242,10 @@ class ArchiveChecker: mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) _, data = self.key.decrypt(archive_id, cdata) - archive = StableDict(msgpack.unpackb(data)) - if archive[b'version'] != 1: + archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if archive.version != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, ARCHIVE_TEXT_KEYS) - archive[b'cmdline'] = [safe_decode(arg) for arg in archive[b'cmdline']] + archive.cmdline = [safe_decode(arg) for arg in archive.cmdline] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): @@ -1253,10 +1253,10 @@ class ArchiveChecker: verify_file_chunks(item) items_buffer.add(item) items_buffer.flush(flush=True) - for previous_item_id in archive[b'items']: + for previous_item_id in archive.items: mark_as_possibly_superseded(previous_item_id) - archive[b'items'] = items_buffer.chunks - data = msgpack.packb(archive, unicode_errors='surrogateescape') + archive.items = items_buffer.chunks + data = msgpack.packb(archive.as_dict(), unicode_errors='surrogateescape') new_archive_id = self.key.id_hash(data) cdata = self.key.encrypt(Chunk(data)) add_reference(new_archive_id, len(data), len(cdata), cdata) @@ -1483,9 +1483,9 @@ class ArchiveRecreater: if completed: timestamp = archive.ts.replace(tzinfo=None) if comment is None: - comment = archive.metadata.get(b'comment', '') + comment = archive.metadata.get('comment', '') target.save(timestamp=timestamp, comment=comment, additional_metadata={ - 'cmdline': archive.metadata[b'cmdline'], + 'cmdline': archive.metadata.cmdline, 'recreate_cmdline': sys.argv, }) if replace_original: @@ -1554,7 +1554,7 @@ class ArchiveRecreater: if not target: target = self.create_target_archive(target_name) # If the archives use the same chunker params, then don't rechunkify - target.recreate_rechunkify = tuple(archive.metadata.get(b'chunker_params')) != self.chunker_params + target.recreate_rechunkify = tuple(archive.metadata.get('chunker_params')) != self.chunker_params return target, resume_from def try_resume(self, archive, target_name): @@ -1573,7 +1573,7 @@ class ArchiveRecreater: return target, resume_from def incref_partial_chunks(self, source_archive, target_archive): - target_archive.recreate_partial_chunks = source_archive.metadata.get(b'recreate_partial_chunks', []) + target_archive.recreate_partial_chunks = source_archive.metadata.get('recreate_partial_chunks', []) for chunk_id, size, csize in target_archive.recreate_partial_chunks: if not self.cache.seen_chunk(chunk_id): try: @@ -1606,8 +1606,8 @@ class ArchiveRecreater: return item def can_resume(self, archive, old_target, target_name): - resume_id = old_target.metadata[b'recreate_source_id'] - resume_args = [safe_decode(arg) for arg in old_target.metadata[b'recreate_args']] + resume_id = old_target.metadata.recreate_source_id + resume_args = [safe_decode(arg) for arg in old_target.metadata.recreate_args] if resume_id != archive.id: logger.warning('Source archive changed, will discard %s and start over', target_name) logger.warning('Saved fingerprint: %s', bin_to_hex(resume_id)) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 98a02df68..4fc46e42d 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -679,8 +679,8 @@ class Archiver: archive2 = Archive(repository, key, manifest, args.archive2, consider_part_files=args.consider_part_files) - can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get( - b'chunker_params', True) or args.same_chunker_params + can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get( + 'chunker_params', True) or args.same_chunker_params if not can_compare_chunk_ids: self.print_warning('--chunker-params might be different between archives, diff will be slow.\n' 'If you know for certain that they are the same, pass --same-chunker-params ' @@ -831,14 +831,14 @@ class Archiver: stats = archive.calc_stats(cache) print('Archive name: %s' % archive.name) print('Archive fingerprint: %s' % archive.fpr) - print('Comment: %s' % archive.metadata.get(b'comment', '')) - print('Hostname: %s' % archive.metadata[b'hostname']) - print('Username: %s' % archive.metadata[b'username']) + print('Comment: %s' % archive.metadata.get('comment', '')) + print('Hostname: %s' % archive.metadata.hostname) + print('Username: %s' % archive.metadata.username) print('Time (start): %s' % format_time(to_localtime(archive.ts))) print('Time (end): %s' % format_time(to_localtime(archive.ts_end))) print('Duration: %s' % archive.duration_from_meta) print('Number of files: %d' % stats.nfiles) - print('Command line: %s' % format_cmdline(archive.metadata[b'cmdline'])) + print('Command line: %s' % format_cmdline(archive.metadata.cmdline)) print(DASHES) print(STATS_HEADER) print(str(stats)) @@ -1009,7 +1009,7 @@ class Archiver: """dump (decrypted, decompressed) archive items metadata (not: data)""" archive = Archive(repository, key, manifest, args.location.archive, consider_part_files=args.consider_part_files) - for i, item_id in enumerate(archive.metadata[b'items']): + for i, item_id in enumerate(archive.metadata.items): _, data = key.decrypt(item_id, repository.get(item_id)) filename = '%06d_%s.items' % (i, bin_to_hex(item_id)) print('Dumping', filename) diff --git a/src/borg/cache.py b/src/borg/cache.py index df4b90861..3f685a949 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -16,7 +16,7 @@ from .helpers import get_cache_dir from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex from .helpers import format_file_size from .helpers import yes -from .item import Item +from .item import Item, ArchiveItem from .key import PlaintextKey from .locking import Lock from .platform import SaveFile @@ -290,12 +290,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" cdata = repository.get(archive_id) _, data = key.decrypt(archive_id, cdata) chunk_idx.add(archive_id, 1, len(data), len(cdata)) - archive = msgpack.unpackb(data) - if archive[b'version'] != 1: + archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if archive.version != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, (b'name',)) unpacker = msgpack.Unpacker() - for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): + for item_id, chunk in zip(archive.items, repository.get_many(archive.items)): _, data = key.decrypt(item_id, chunk) chunk_idx.add(item_id, 1, len(data), len(chunk)) unpacker.feed(data) diff --git a/src/borg/constants.py b/src/borg/constants.py index d83c41f20..d6f26d116 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -15,8 +15,6 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us # this is the set of keys that are always present in archives: REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) -ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end') - # default umask, overriden by --umask, defaults to read/write only for owner UMASK_DEFAULT = 0o077 diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 3113515fe..f5375924e 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -117,7 +117,7 @@ class FuseOperations(llfuse.Operations): """Build fuse inode hierarchy from archive metadata """ unpacker = msgpack.Unpacker() - for key, chunk in zip(archive.metadata[b'items'], self.repository.get_many(archive.metadata[b'items'])): + for key, chunk in zip(archive.metadata.items, self.repository.get_many(archive.metadata.items)): _, data = self.key.decrypt(key, chunk) unpacker.feed(data) for item in unpacker: diff --git a/src/borg/item.py b/src/borg/item.py index c062f09ca..93999e20e 100644 --- a/src/borg/item.py +++ b/src/borg/item.py @@ -220,7 +220,8 @@ class ArchiveItem(PropDict): VALID_KEYS = {'version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end', 'comment', 'chunker_params', - 'recreate_cmdline', 'recreate_source_id', 'recreate_args'} # str-typed keys + 'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', + } # str-typed keys __slots__ = ("_dict", ) # avoid setting attributes not supported by properties @@ -237,3 +238,4 @@ class ArchiveItem(PropDict): recreate_source_id = PropDict._make_property('recreate_source_id', bytes) recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str + recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 19db1a44c..49648ef47 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -8,7 +8,7 @@ import msgpack from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics from ..archive import BackupOSError, backup_io, backup_io_iter -from ..item import Item +from ..item import Item, ArchiveItem from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -77,7 +77,7 @@ class ArchiveTimestampTestCase(BaseTestCase): key = PlaintextKey(repository) manifest = Manifest(repository, key) a = Archive(repository, key, manifest, 'test', create=True) - a.metadata = {b'time': isoformat} + a.metadata = ArchiveItem(time=isoformat) self.assert_equal(a.ts, expected) def test_with_microseconds(self): diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 2df01b29e..1901b8d49 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1859,7 +1859,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): def test_missing_archive_item_chunk(self): archive, repository = self.open_archive('archive1') with repository: - repository.delete(archive.metadata[b'items'][-5]) + repository.delete(archive.metadata.items[-5]) repository.commit() self.cmd('check', self.repository_location, exit_code=1) self.cmd('check', '--repair', self.repository_location, exit_code=0)