From d067bc31784b6650135719f8914d6d2e540c2d2c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 26 May 2015 02:04:41 +0200 Subject: [PATCH] efficient archive list from manifest a lot of speedup for: "list ", "delete " list, "prune" - esp. for slow connections to remote repositories. the previous method used metadata from the archive itself, which is (in total) rather large. so if you had many archives and a slow (remote) connection, it was very slow. but there is a lot easier way: just use the archives list from the repository manifest - we already have it anyway and it also has name, id and timestamp for all archives - and that's all we need. I defined a ArchiveInfo namedtuple that has same element names as seen as attribute names of the Archive object, so as long as name, id, ts is enough, it can be used in its place. --- borg/archive.py | 1 + borg/archiver.py | 13 ++++++------- borg/helpers.py | 13 +++++++++++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index a2cc59b99..3eccbbeef 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -494,6 +494,7 @@ class Archive: @staticmethod def list_archives(repository, key, manifest, cache=None): + # expensive! see also Manifest.list_archive_infos. for name, info in manifest.archives.items(): yield Archive(repository, key, manifest, name, cache=cache) diff --git a/borg/archiver.py b/borg/archiver.py index 79bf65f03..4b13e47fd 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -284,8 +284,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) else: print("You requested to completely DELETE the repository *including* all archives it contains:") - for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): - print(format_archive(archive)) + for archive_info in manifest.list_archive_infos(sort_by='ts'): + print(format_archive(archive_info)) print("""Type "YES" if you understand this and want to continue.\n""") if input('Do you want to continue? ') == 'YES': repository.destroy() @@ -354,8 +354,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") item[b'group'] or item[b'gid'], size, format_time(mtime), remove_surrogates(item[b'path']), extra)) else: - for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): - print(format_archive(archive)) + for archive_info in manifest.list_archive_infos(sort_by='ts'): + print(format_archive(archive_info)) return self.exit_code def do_info(self, args): @@ -380,8 +380,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repository = self.open_repository(args.repository, exclusive=True) manifest, key = Manifest.load(repository) cache = Cache(repository, key, manifest, do_files=args.cache_files) - archives = list(sorted(Archive.list_archives(repository, key, manifest, cache), - key=attrgetter('ts'), reverse=True)) + archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None: self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" ' 'settings must be specified') @@ -412,7 +411,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self.print_verbose('Would prune: %s' % format_archive(archive)) else: self.print_verbose('Pruning archive: %s' % format_archive(archive)) - archive.delete(stats) + Archive(repository, key, manifest, archive.name, cache).delete(stats) if to_delete and not args.dry_run: manifest.write() repository.commit() diff --git a/borg/helpers.py b/borg/helpers.py index f96c1bf52..e97c88bf2 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,5 +1,6 @@ import argparse import binascii +from collections import namedtuple import grp import msgpack import os @@ -119,6 +120,18 @@ class Manifest: self.id = self.key.id_hash(data) self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) + def list_archive_infos(self, sort_by=None, reverse=False): + # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts + ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') + archives = [] + for name, values in self.archives.items(): + ts = parse_timestamp(values[b'time'].decode('utf-8')) + id = values[b'id'] + archives.append(ArchiveInfo(name=name, id=id, ts=ts)) + if sort_by is not None: + archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) + return archives + def prune_within(archives, within): multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}