efficient archive list from manifest

a lot of speedup for:
"list <repo>", "delete <repo>" list, "prune" - esp. for slow connections to remote repositories.

the previous method used metadata from the archive itself, which is (in total) rather large.
so if you had many archives and a slow (remote) connection, it was very slow.

but there is a lot easier way: just use the archives list from the repository manifest - we already
have it anyway and it also has name, id and timestamp for all archives - and that's all we need.

I defined a ArchiveInfo namedtuple that has same element names as seen as attribute names
of the Archive object, so as long as name, id, ts is enough, it can be used in its place.
This commit is contained in:
Thomas Waldmann 2015-05-26 02:04:41 +02:00
parent 74409e4fcb
commit d067bc3178
3 changed files with 20 additions and 7 deletions

View File

@ -494,6 +494,7 @@ class Archive:
@staticmethod
def list_archives(repository, key, manifest, cache=None):
# expensive! see also Manifest.list_archive_infos.
for name, info in manifest.archives.items():
yield Archive(repository, key, manifest, name, cache=cache)

View File

@ -284,8 +284,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
stats.print_('Deleted data:', cache)
else:
print("You requested to completely DELETE the repository *including* all archives it contains:")
for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
print(format_archive(archive))
for archive_info in manifest.list_archive_infos(sort_by='ts'):
print(format_archive(archive_info))
print("""Type "YES" if you understand this and want to continue.\n""")
if input('Do you want to continue? ') == 'YES':
repository.destroy()
@ -354,8 +354,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
item[b'group'] or item[b'gid'], size, format_time(mtime),
remove_surrogates(item[b'path']), extra))
else:
for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
print(format_archive(archive))
for archive_info in manifest.list_archive_infos(sort_by='ts'):
print(format_archive(archive_info))
return self.exit_code
def do_info(self, args):
@ -380,8 +380,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
repository = self.open_repository(args.repository, exclusive=True)
manifest, key = Manifest.load(repository)
cache = Cache(repository, key, manifest, do_files=args.cache_files)
archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
key=attrgetter('ts'), reverse=True))
archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" '
'settings must be specified')
@ -412,7 +411,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
self.print_verbose('Would prune: %s' % format_archive(archive))
else:
self.print_verbose('Pruning archive: %s' % format_archive(archive))
archive.delete(stats)
Archive(repository, key, manifest, archive.name, cache).delete(stats)
if to_delete and not args.dry_run:
manifest.write()
repository.commit()

View File

@ -1,5 +1,6 @@
import argparse
import binascii
from collections import namedtuple
import grp
import msgpack
import os
@ -119,6 +120,18 @@ class Manifest:
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
def list_archive_infos(self, sort_by=None, reverse=False):
# inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
archives = []
for name, values in self.archives.items():
ts = parse_timestamp(values[b'time'].decode('utf-8'))
id = values[b'id']
archives.append(ArchiveInfo(name=name, id=id, ts=ts))
if sort_by is not None:
archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
return archives
def prune_within(archives, within):
multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}