Merge pull request #2217 from ThomasWaldmann/bench-cmd

borg benchmark crud command
This commit is contained in:
TW 2017-02-28 21:49:04 +01:00 committed by GitHub
commit 0db058345b
2 changed files with 196 additions and 1 deletions

View File

@ -0,0 +1,64 @@
borg benchmark crud
===================
Here is some example of borg benchmark crud output.
I ran it on my laptop, Core i5-4200u, 8GB RAM, SATA SSD, Linux, ext4 fs.
"src" as well as repo is local, on this SSD.
$ BORG_PASSPHRASE=secret borg init --encryption repokey-blake2 repo
$ BORG_PASSPHRASE=secret borg benchmark crud repo src
C-Z-BIG 116.06 MB/s (10 * 100.00 MB all-zero files: 8.62s)
R-Z-BIG 197.00 MB/s (10 * 100.00 MB all-zero files: 5.08s)
U-Z-BIG 418.07 MB/s (10 * 100.00 MB all-zero files: 2.39s)
D-Z-BIG 724.94 MB/s (10 * 100.00 MB all-zero files: 1.38s)
C-R-BIG 42.21 MB/s (10 * 100.00 MB random files: 23.69s)
R-R-BIG 134.45 MB/s (10 * 100.00 MB random files: 7.44s)
U-R-BIG 316.83 MB/s (10 * 100.00 MB random files: 3.16s)
D-R-BIG 251.10 MB/s (10 * 100.00 MB random files: 3.98s)
C-Z-MEDIUM 118.53 MB/s (1000 * 1.00 MB all-zero files: 8.44s)
R-Z-MEDIUM 218.49 MB/s (1000 * 1.00 MB all-zero files: 4.58s)
U-Z-MEDIUM 591.59 MB/s (1000 * 1.00 MB all-zero files: 1.69s)
D-Z-MEDIUM 730.04 MB/s (1000 * 1.00 MB all-zero files: 1.37s)
C-R-MEDIUM 31.46 MB/s (1000 * 1.00 MB random files: 31.79s)
R-R-MEDIUM 129.64 MB/s (1000 * 1.00 MB random files: 7.71s)
U-R-MEDIUM 621.86 MB/s (1000 * 1.00 MB random files: 1.61s)
D-R-MEDIUM 234.82 MB/s (1000 * 1.00 MB random files: 4.26s)
C-Z-SMALL 19.81 MB/s (10000 * 10.00 kB all-zero files: 5.05s)
R-Z-SMALL 97.69 MB/s (10000 * 10.00 kB all-zero files: 1.02s)
U-Z-SMALL 36.35 MB/s (10000 * 10.00 kB all-zero files: 2.75s)
D-Z-SMALL 57.04 MB/s (10000 * 10.00 kB all-zero files: 1.75s)
C-R-SMALL 9.81 MB/s (10000 * 10.00 kB random files: 10.19s)
R-R-SMALL 92.21 MB/s (10000 * 10.00 kB random files: 1.08s)
U-R-SMALL 64.62 MB/s (10000 * 10.00 kB random files: 1.55s)
D-R-SMALL 51.62 MB/s (10000 * 10.00 kB random files: 1.94s)
A second run some time later gave:
C-Z-BIG 115.22 MB/s (10 * 100.00 MB all-zero files: 8.68s)
R-Z-BIG 196.06 MB/s (10 * 100.00 MB all-zero files: 5.10s)
U-Z-BIG 439.50 MB/s (10 * 100.00 MB all-zero files: 2.28s)
D-Z-BIG 671.11 MB/s (10 * 100.00 MB all-zero files: 1.49s)
C-R-BIG 43.40 MB/s (10 * 100.00 MB random files: 23.04s)
R-R-BIG 133.17 MB/s (10 * 100.00 MB random files: 7.51s)
U-R-BIG 464.50 MB/s (10 * 100.00 MB random files: 2.15s)
D-R-BIG 245.19 MB/s (10 * 100.00 MB random files: 4.08s)
C-Z-MEDIUM 110.82 MB/s (1000 * 1.00 MB all-zero files: 9.02s)
R-Z-MEDIUM 217.96 MB/s (1000 * 1.00 MB all-zero files: 4.59s)
U-Z-MEDIUM 601.54 MB/s (1000 * 1.00 MB all-zero files: 1.66s)
D-Z-MEDIUM 686.99 MB/s (1000 * 1.00 MB all-zero files: 1.46s)
C-R-MEDIUM 39.91 MB/s (1000 * 1.00 MB random files: 25.06s)
R-R-MEDIUM 128.91 MB/s (1000 * 1.00 MB random files: 7.76s)
U-R-MEDIUM 599.00 MB/s (1000 * 1.00 MB random files: 1.67s)
D-R-MEDIUM 230.69 MB/s (1000 * 1.00 MB random files: 4.33s)
C-Z-SMALL 14.78 MB/s (10000 * 10.00 kB all-zero files: 6.76s)
R-Z-SMALL 96.86 MB/s (10000 * 10.00 kB all-zero files: 1.03s)
U-Z-SMALL 35.22 MB/s (10000 * 10.00 kB all-zero files: 2.84s)
D-Z-SMALL 64.93 MB/s (10000 * 10.00 kB all-zero files: 1.54s)
C-R-SMALL 11.08 MB/s (10000 * 10.00 kB random files: 9.02s)
R-R-SMALL 92.34 MB/s (10000 * 10.00 kB random files: 1.08s)
U-R-SMALL 64.49 MB/s (10000 * 10.00 kB random files: 1.55s)
D-R-SMALL 46.96 MB/s (10000 * 10.00 kB random files: 2.13s)

View File

@ -9,6 +9,7 @@ import logging
import os
import re
import shlex
import shutil
import signal
import stat
import subprocess
@ -17,6 +18,7 @@ import textwrap
import time
import traceback
from binascii import unhexlify
from contextlib import contextmanager
from datetime import datetime, timedelta
from itertools import zip_longest
@ -57,7 +59,7 @@ from .helpers import basic_json_data, json_print
from .item import Item
from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
from .keymanager import KeyManager
from .platform import get_flags, umount, get_process_id
from .platform import get_flags, umount, get_process_id, SyncFile
from .remote import RepositoryServer, RemoteRepository, cache_if_remote
from .repository import Repository, LIST_SCAN_LIMIT
from .selftest import selftest
@ -322,6 +324,72 @@ class Archiver:
logger.info('Key updated')
return EXIT_SUCCESS
def do_benchmark_crud(self, args):
def measurement_run(repo, path):
archive = repo + '::borg-benchmark-crud'
compression = '--compression=none'
# measure create perf (without files cache to always have it chunking)
t_start = time.monotonic()
rc = self.do_create(self.parse_args(['create', compression, '--no-files-cache', archive + '1', path]))
t_end = time.monotonic()
dt_create = t_end - t_start
assert rc == 0
# now build files cache
rc1 = self.do_create(self.parse_args(['create', compression, archive + '2', path]))
rc2 = self.do_delete(self.parse_args(['delete', archive + '2']))
assert rc1 == rc2 == 0
# measure a no-change update (archive1 is still present)
t_start = time.monotonic()
rc1 = self.do_create(self.parse_args(['create', compression, archive + '3', path]))
t_end = time.monotonic()
dt_update = t_end - t_start
rc2 = self.do_delete(self.parse_args(['delete', archive + '3']))
assert rc1 == rc2 == 0
# measure extraction (dry-run: without writing result to disk)
t_start = time.monotonic()
rc = self.do_extract(self.parse_args(['extract', '--dry-run', archive + '1']))
t_end = time.monotonic()
dt_extract = t_end - t_start
assert rc == 0
# measure archive deletion (of LAST present archive with the data)
t_start = time.monotonic()
rc = self.do_delete(self.parse_args(['delete', archive + '1']))
t_end = time.monotonic()
dt_delete = t_end - t_start
assert rc == 0
return dt_create, dt_update, dt_extract, dt_delete
@contextmanager
def test_files(path, count, size, random):
path = os.path.join(path, 'borg-test-data')
os.makedirs(path)
for i in range(count):
fname = os.path.join(path, 'file_%d' % i)
data = b'\0' * size if not random else os.urandom(size)
with SyncFile(fname, binary=True) as fd: # used for posix_fadvise's sake
fd.write(data)
yield path
shutil.rmtree(path)
for msg, count, size, random in [
('Z-BIG', 10, 100000000, False),
('R-BIG', 10, 100000000, True),
('Z-MEDIUM', 1000, 1000000, False),
('R-MEDIUM', 1000, 1000000, True),
('Z-SMALL', 10000, 10000, False),
('R-SMALL', 10000, 10000, True),
]:
with test_files(args.path, count, size, random) as path:
dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path)
total_size_MB = count * size / 1e06
file_size_formatted = format_file_size(size)
content = 'random' if random else 'all-zero'
fmt = '%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)'
print(fmt % ('C', msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create))
print(fmt % ('R', msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract))
print(fmt % ('U', msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update))
print(fmt % ('D', msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete))
@with_repository(fake='dry_run', exclusive=True)
def do_create(self, args, repository, manifest=None, key=None):
"""Create new archive"""
@ -3141,6 +3209,69 @@ class Archiver:
subparser.add_argument('ids', metavar='IDs', nargs='+', type=str,
help='hex object ID(s) to show refcounts for')
benchmark_epilog = process_epilog("These commands do various benchmarks.")
subparser = subparsers.add_parser('benchmark', parents=[common_parser], add_help=False,
description='benchmark command',
epilog=benchmark_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='benchmark command')
benchmark_parsers = subparser.add_subparsers(title='required arguments', metavar='<command>')
subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser))
bench_crud_epilog = process_epilog("""
This command benchmarks borg CRUD (create, read, update, delete) operations.
It creates input data below the given PATH and backups this data into the given REPO.
The REPO must already exist (it could be a fresh empty repo or an existing repo, the
command will create / read / update / delete some archives named borg-test-data* there.
Make sure you have free space there, you'll need about 1GB each (+ overhead).
If your repository is encrypted and borg needs a passphrase to unlock the key, use:
BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH
Measurements are done with different input file sizes and counts.
The file contents are very artificial (either all zero or all random),
thus the measurement results do not necessarily reflect performance with real data.
Also, due to the kind of content used, no compression is used in these benchmarks.
C- == borg create (1st archive creation, no compression, do not use files cache)
C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher.
C-R- == random files. no dedup, measuring throughput through all processing stages.
R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk)
R-Z- == all zero files. Measuring heavily duplicated files.
R-R- == random files. No duplication here, measuring throughput through all processing
stages, except writing to disk.
U- == borg create (2nd archive creation of unchanged input files, measure files cache speed)
The throughput value is kind of virtual here, it does not actually read the file.
U-Z- == needs to check the 2 all-zero chunks' existence in the repo.
U-R- == needs to check existence of a lot of different chunks in the repo.
D- == borg delete archive (delete last remaining archive, measure deletion + compaction)
D-Z- == few chunks to delete / few segments to compact/remove.
D-R- == many chunks to delete / many segments to compact/remove.
Please note that there might be quite some variance in these measurements.
Try multiple measurements and having a otherwise idle machine (and network, if you use it).
""")
subparser = benchmark_parsers.add_parser('crud', parents=[common_parser], add_help=False,
description=self.do_benchmark_crud.__doc__,
epilog=bench_crud_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter,
help='benchmarks borg CRUD (create, extract, update, delete).')
subparser.set_defaults(func=self.do_benchmark_crud)
subparser.add_argument('location', metavar='REPO',
type=location_validator(archive=False),
help='repo to use for benchmark (must exist)')
subparser.add_argument('path', metavar='PATH', help='path were to create benchmark input data')
return parser
@staticmethod