From 9f3a970cec49bd3bd7f70d2921a99bfe3b1c3725 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Feb 2017 07:17:23 +0100 Subject: [PATCH 1/2] borg benchmark crud command, fixes #1788 --- src/borg/archiver.py | 133 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 243e91d2d..c5dace289 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -9,6 +9,7 @@ import logging import os import re import shlex +import shutil import signal import stat import subprocess @@ -17,6 +18,7 @@ import textwrap import time import traceback from binascii import unhexlify +from contextlib import contextmanager from datetime import datetime, timedelta from itertools import zip_longest @@ -57,7 +59,7 @@ from .helpers import basic_json_data, json_print from .item import Item from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey from .keymanager import KeyManager -from .platform import get_flags, umount, get_process_id +from .platform import get_flags, umount, get_process_id, SyncFile from .remote import RepositoryServer, RemoteRepository, cache_if_remote from .repository import Repository, LIST_SCAN_LIMIT from .selftest import selftest @@ -322,6 +324,72 @@ class Archiver: logger.info('Key updated') return EXIT_SUCCESS + def do_benchmark_crud(self, args): + def measurement_run(repo, path): + archive = repo + '::borg-benchmark-crud' + compression = '--compression=none' + # measure create perf (without files cache to always have it chunking) + t_start = time.monotonic() + rc = self.do_create(self.parse_args(['create', compression, '--no-files-cache', archive + '1', path])) + t_end = time.monotonic() + dt_create = t_end - t_start + assert rc == 0 + # now build files cache + rc1 = self.do_create(self.parse_args(['create', compression, archive + '2', path])) + rc2 = self.do_delete(self.parse_args(['delete', archive + '2'])) + assert rc1 == rc2 == 0 + # measure a no-change update (archive1 is still present) + t_start = time.monotonic() + rc1 = self.do_create(self.parse_args(['create', compression, archive + '3', path])) + t_end = time.monotonic() + dt_update = t_end - t_start + rc2 = self.do_delete(self.parse_args(['delete', archive + '3'])) + assert rc1 == rc2 == 0 + # measure extraction (dry-run: without writing result to disk) + t_start = time.monotonic() + rc = self.do_extract(self.parse_args(['extract', '--dry-run', archive + '1'])) + t_end = time.monotonic() + dt_extract = t_end - t_start + assert rc == 0 + # measure archive deletion (of LAST present archive with the data) + t_start = time.monotonic() + rc = self.do_delete(self.parse_args(['delete', archive + '1'])) + t_end = time.monotonic() + dt_delete = t_end - t_start + assert rc == 0 + return dt_create, dt_update, dt_extract, dt_delete + + @contextmanager + def test_files(path, count, size, random): + path = os.path.join(path, 'borg-test-data') + os.makedirs(path) + for i in range(count): + fname = os.path.join(path, 'file_%d' % i) + data = b'\0' * size if not random else os.urandom(size) + with SyncFile(fname, binary=True) as fd: # used for posix_fadvise's sake + fd.write(data) + yield path + shutil.rmtree(path) + + for msg, count, size, random in [ + ('Z-BIG', 10, 100000000, False), + ('R-BIG', 10, 100000000, True), + ('Z-MEDIUM', 1000, 1000000, False), + ('R-MEDIUM', 1000, 1000000, True), + ('Z-SMALL', 10000, 10000, False), + ('R-SMALL', 10000, 10000, True), + ]: + with test_files(args.path, count, size, random) as path: + dt_create, dt_update, dt_extract, dt_delete = measurement_run(args.location.canonical_path(), path) + total_size_MB = count * size / 1e06 + file_size_formatted = format_file_size(size) + content = 'random' if random else 'all-zero' + fmt = '%s-%-10s %9.2f MB/s (%d * %s %s files: %.2fs)' + print(fmt % ('C', msg, total_size_MB / dt_create, count, file_size_formatted, content, dt_create)) + print(fmt % ('R', msg, total_size_MB / dt_extract, count, file_size_formatted, content, dt_extract)) + print(fmt % ('U', msg, total_size_MB / dt_update, count, file_size_formatted, content, dt_update)) + print(fmt % ('D', msg, total_size_MB / dt_delete, count, file_size_formatted, content, dt_delete)) + @with_repository(fake='dry_run', exclusive=True) def do_create(self, args, repository, manifest=None, key=None): """Create new archive""" @@ -3141,6 +3209,69 @@ class Archiver: subparser.add_argument('ids', metavar='IDs', nargs='+', type=str, help='hex object ID(s) to show refcounts for') + benchmark_epilog = process_epilog("These commands do various benchmarks.") + + subparser = subparsers.add_parser('benchmark', parents=[common_parser], add_help=False, + description='benchmark command', + epilog=benchmark_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='benchmark command') + + benchmark_parsers = subparser.add_subparsers(title='required arguments', metavar='') + subparser.set_defaults(fallback_func=functools.partial(self.do_subcommand_help, subparser)) + + bench_crud_epilog = process_epilog(""" + This command benchmarks borg CRUD (create, read, update, delete) operations. + + It creates input data below the given PATH and backups this data into the given REPO. + The REPO must already exist (it could be a fresh empty repo or an existing repo, the + command will create / read / update / delete some archives named borg-test-data* there. + + Make sure you have free space there, you'll need about 1GB each (+ overhead). + + If your repository is encrypted and borg needs a passphrase to unlock the key, use: + + BORG_PASSPHRASE=mysecret borg benchmark crud REPO PATH + + Measurements are done with different input file sizes and counts. + The file contents are very artificial (either all zero or all random), + thus the measurement results do not necessarily reflect performance with real data. + Also, due to the kind of content used, no compression is used in these benchmarks. + + C- == borg create (1st archive creation, no compression, do not use files cache) + C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher. + C-R- == random files. no dedup, measuring throughput through all processing stages. + + R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk) + R-Z- == all zero files. Measuring heavily duplicated files. + R-R- == random files. No duplication here, measuring throughput through all processing + stages, except writing to disk. + + U- == borg create (2nd archive creation of unchanged input files, measure files cache speed) + The throughput value is kind of virtual here, it does not actually read the file. + U-Z- == needs to check the 2 all-zero chunks' existence in the repo. + U-R- == needs to check existence of a lot of different chunks in the repo. + + D- == borg delete archive (delete last remaining archive, measure deletion + compaction) + D-Z- == few chunks to delete / few segments to compact/remove. + D-R- == many chunks to delete / many segments to compact/remove. + + Please note that there might be quite some variance in these measurements. + Try multiple measurements and having a otherwise idle machine (and network, if you use it). + """) + subparser = benchmark_parsers.add_parser('crud', parents=[common_parser], add_help=False, + description=self.do_benchmark_crud.__doc__, + epilog=bench_crud_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='benchmarks borg CRUD (create, extract, update, delete).') + subparser.set_defaults(func=self.do_benchmark_crud) + + subparser.add_argument('location', metavar='REPO', + type=location_validator(archive=False), + help='repo to use for benchmark (must exist)') + + subparser.add_argument('path', metavar='PATH', help='path were to create benchmark input data') + return parser @staticmethod From 7e9845fc68a202a9cdf84e04f2f983c2b4814560 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 28 Feb 2017 21:44:06 +0100 Subject: [PATCH 2/2] added borg benchmark crud output to docs/misc/ --- docs/misc/benchmark-crud.txt | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 docs/misc/benchmark-crud.txt diff --git a/docs/misc/benchmark-crud.txt b/docs/misc/benchmark-crud.txt new file mode 100644 index 000000000..f4ca363b9 --- /dev/null +++ b/docs/misc/benchmark-crud.txt @@ -0,0 +1,64 @@ +borg benchmark crud +=================== + +Here is some example of borg benchmark crud output. + +I ran it on my laptop, Core i5-4200u, 8GB RAM, SATA SSD, Linux, ext4 fs. +"src" as well as repo is local, on this SSD. + +$ BORG_PASSPHRASE=secret borg init --encryption repokey-blake2 repo +$ BORG_PASSPHRASE=secret borg benchmark crud repo src + +C-Z-BIG 116.06 MB/s (10 * 100.00 MB all-zero files: 8.62s) +R-Z-BIG 197.00 MB/s (10 * 100.00 MB all-zero files: 5.08s) +U-Z-BIG 418.07 MB/s (10 * 100.00 MB all-zero files: 2.39s) +D-Z-BIG 724.94 MB/s (10 * 100.00 MB all-zero files: 1.38s) +C-R-BIG 42.21 MB/s (10 * 100.00 MB random files: 23.69s) +R-R-BIG 134.45 MB/s (10 * 100.00 MB random files: 7.44s) +U-R-BIG 316.83 MB/s (10 * 100.00 MB random files: 3.16s) +D-R-BIG 251.10 MB/s (10 * 100.00 MB random files: 3.98s) +C-Z-MEDIUM 118.53 MB/s (1000 * 1.00 MB all-zero files: 8.44s) +R-Z-MEDIUM 218.49 MB/s (1000 * 1.00 MB all-zero files: 4.58s) +U-Z-MEDIUM 591.59 MB/s (1000 * 1.00 MB all-zero files: 1.69s) +D-Z-MEDIUM 730.04 MB/s (1000 * 1.00 MB all-zero files: 1.37s) +C-R-MEDIUM 31.46 MB/s (1000 * 1.00 MB random files: 31.79s) +R-R-MEDIUM 129.64 MB/s (1000 * 1.00 MB random files: 7.71s) +U-R-MEDIUM 621.86 MB/s (1000 * 1.00 MB random files: 1.61s) +D-R-MEDIUM 234.82 MB/s (1000 * 1.00 MB random files: 4.26s) +C-Z-SMALL 19.81 MB/s (10000 * 10.00 kB all-zero files: 5.05s) +R-Z-SMALL 97.69 MB/s (10000 * 10.00 kB all-zero files: 1.02s) +U-Z-SMALL 36.35 MB/s (10000 * 10.00 kB all-zero files: 2.75s) +D-Z-SMALL 57.04 MB/s (10000 * 10.00 kB all-zero files: 1.75s) +C-R-SMALL 9.81 MB/s (10000 * 10.00 kB random files: 10.19s) +R-R-SMALL 92.21 MB/s (10000 * 10.00 kB random files: 1.08s) +U-R-SMALL 64.62 MB/s (10000 * 10.00 kB random files: 1.55s) +D-R-SMALL 51.62 MB/s (10000 * 10.00 kB random files: 1.94s) + + +A second run some time later gave: + +C-Z-BIG 115.22 MB/s (10 * 100.00 MB all-zero files: 8.68s) +R-Z-BIG 196.06 MB/s (10 * 100.00 MB all-zero files: 5.10s) +U-Z-BIG 439.50 MB/s (10 * 100.00 MB all-zero files: 2.28s) +D-Z-BIG 671.11 MB/s (10 * 100.00 MB all-zero files: 1.49s) +C-R-BIG 43.40 MB/s (10 * 100.00 MB random files: 23.04s) +R-R-BIG 133.17 MB/s (10 * 100.00 MB random files: 7.51s) +U-R-BIG 464.50 MB/s (10 * 100.00 MB random files: 2.15s) +D-R-BIG 245.19 MB/s (10 * 100.00 MB random files: 4.08s) +C-Z-MEDIUM 110.82 MB/s (1000 * 1.00 MB all-zero files: 9.02s) +R-Z-MEDIUM 217.96 MB/s (1000 * 1.00 MB all-zero files: 4.59s) +U-Z-MEDIUM 601.54 MB/s (1000 * 1.00 MB all-zero files: 1.66s) +D-Z-MEDIUM 686.99 MB/s (1000 * 1.00 MB all-zero files: 1.46s) +C-R-MEDIUM 39.91 MB/s (1000 * 1.00 MB random files: 25.06s) +R-R-MEDIUM 128.91 MB/s (1000 * 1.00 MB random files: 7.76s) +U-R-MEDIUM 599.00 MB/s (1000 * 1.00 MB random files: 1.67s) +D-R-MEDIUM 230.69 MB/s (1000 * 1.00 MB random files: 4.33s) +C-Z-SMALL 14.78 MB/s (10000 * 10.00 kB all-zero files: 6.76s) +R-Z-SMALL 96.86 MB/s (10000 * 10.00 kB all-zero files: 1.03s) +U-Z-SMALL 35.22 MB/s (10000 * 10.00 kB all-zero files: 2.84s) +D-Z-SMALL 64.93 MB/s (10000 * 10.00 kB all-zero files: 1.54s) +C-R-SMALL 11.08 MB/s (10000 * 10.00 kB random files: 9.02s) +R-R-SMALL 92.34 MB/s (10000 * 10.00 kB random files: 1.08s) +U-R-SMALL 64.49 MB/s (10000 * 10.00 kB random files: 1.55s) +D-R-SMALL 46.96 MB/s (10000 * 10.00 kB random files: 2.13s) +