Commit 9c27ee9f authored by Geoff Simmons's avatar Geoff Simmons

Add a benchmark utility for the PT functions.

parent 37747adf
AUTOMAKE_OPTIONS = subdir-objects
AM_CFLAGS = $(VARNISHAPI_CFLAGS) -I$(top_srcdir)/src -Wall -Werror -Wextra \
-std=c99
AM_LDFLAGS = $(VARNISHAPI_LIBS)
bin_PROGRAMS = bench
bench_SOURCES = bench.c
bench_LDADD = $(top_srcdir)/src/.libs/patricia.o
/*-
* Copyright (c) 2020 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for strdup() and getline() */
#define _POSIX_C_SOURCE 200809L
/* for srand48() and drand48() */
#define _XOPEN_SOURCE
#include "config.h"
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <stdint.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "vsb.h"
#include "patricia.h"
#define BILLION (1000 * 1000 * 1000)
#define ITERATIONS (1000)
#define CLOCK (CLOCK_MONOTONIC)
static struct pt_y *origo = NULL;
static uint64_t
tdiff(struct timespec *before, struct timespec *after)
{
uint64_t diff;
if (after->tv_nsec - before->tv_nsec < 0) {
diff = (after->tv_sec - before->tv_sec - 1) * BILLION;
diff += BILLION + after->tv_nsec - before->tv_nsec;
return diff;
}
diff = (after->tv_sec - before->tv_sec) * BILLION;
diff += after->tv_nsec - before->tv_nsec;
return diff;
}
static int
rnd(int lo, int hi)
{
return ((int)(drand48() * (hi - lo)) + lo);
}
static inline void
shuffle(unsigned *index, size_t n)
{
for (unsigned i = 0; i < n - 1; i++) {
int j = rnd(i, n);
int temp = index[i];
index[i] = index[j];
index[j] = temp;
}
}
static void
free_on_exit(void)
{
PT_Free(origo);
}
void
usage(const char *argv, int status)
{
fprintf(stderr,
"Usage: %s [-hs] [-c csvfile] [-d dumpfile] [-i inputfile]\n"
" [-m m|p] [-n iterations] [file]\n", argv);
exit(status);
}
int
main(int argc, char *argv[])
{
FILE *stringsf = stdin, *csv = NULL;
char **strings = NULL, **inputs = NULL, *line, *inputf = NULL,
*csvf = NULL, *dumpf = NULL;
size_t lineln = LINE_MAX;
ssize_t readln;
unsigned n = 0, ninputs = 0;
struct timespec before, after, start, finish;
uint64_t ns = 0, iters, matches, exacts, misses;
struct pt_stats stats = { .magic = PT_STATS_MAGIC };
int opt, do_shuf = 0, do_iters = ITERATIONS, do_match = 1, do_prefix = 1;
struct rusage rusage;
while ((opt = getopt(argc, argv, "hsc:d:i:m:n:")) != -1) {
switch (opt) {
case 'c':
csvf = optarg;
break;
case 'd':
dumpf = optarg;
break;
case 'h':
usage(argv[0], EXIT_SUCCESS);
break;
case 'i':
inputf = optarg;
break;
case 'm':
if (strcmp(optarg, "m") == 0)
do_prefix = 0;
else if (strcmp(optarg, "p") == 0)
do_match = 0;
else
usage(argv[0], EXIT_FAILURE);
break;
case 'n':
do_iters = atoi(optarg);
break;
case 's':
do_shuf = 1;
break;
default:
usage(argv[0], EXIT_FAILURE);
}
}
if (argc - optind > 1)
usage(argv[0], EXIT_FAILURE);
if (argc == optind + 1 && strcmp(argv[optind], "-") != 0) {
errno = 0;
if ((stringsf = fopen(argv[optind], "r")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
}
line = malloc(lineln);
AN(line);
errno = 0;
while ((readln = getline(&line, &lineln, stringsf)) != -1) {
if (readln - 1 > USHRT_MAX) {
fprintf(stderr, "String length %zd too long (max %d)\n",
readln - 1, USHRT_MAX);
exit(EXIT_FAILURE);
}
n++;
strings = realloc(strings, n * sizeof(char *));
AN(strings);
line[readln - 1] = '\0';
strings[n - 1] = strdup(line);
AN(strings[n - 1]);
}
if (ferror(stringsf)) {
fprintf(stderr, "Error reading %s: %s",
stringsf == stdin ? "stdin" : argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
errno = 0;
if (stringsf != stdin && fclose(stringsf) != 0) {
fprintf(stderr, "Error closing %s: %s\n", argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
printf("%u strings read\n", n);
if (n == 0)
exit(EXIT_SUCCESS);
if (inputf != NULL) {
FILE *inf;
errno = 0;
if ((inf = fopen(inputf, "r")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
while ((readln = getline(&line, &lineln, inf)) != -1) {
if (readln - 1 > USHRT_MAX) {
fprintf(stderr, "%s: string length %zd "
"too long (max %d)\n", inputf,
readln - 1, USHRT_MAX);
exit(EXIT_FAILURE);
}
ninputs++;
inputs = realloc(inputs, ninputs * sizeof(char *));
AN(inputs);
line[readln - 1] = '\0';
inputs[ninputs - 1] = strdup(line);
AN(inputs[ninputs - 1]);
}
if (ferror(inf)) {
fprintf(stderr, "Error reading %s: %s", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
errno = 0;
if (fclose(inf) != 0) {
fprintf(stderr, "Error closing %s: %s\n", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
printf("%u input strings read\n", ninputs);
}
if (inputf == NULL || ninputs == 0) {
printf("Using string set as inputs\n");
inputs = strings;
ninputs = n;
}
free(line);
if (csvf != NULL) {
errno = 0;
if ((csv = fopen(csvf, "w")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", csvf,
strerror(errno));
exit(EXIT_FAILURE);
}
fprintf(csv, "type,matches,exact,t\n");
}
AZ(clock_gettime(CLOCK_REALTIME, &before));
srand48(before.tv_nsec);
AZ(clock_getres(CLOCK, &before));
printf("Clock resolution %ld ns\n",
before.tv_sec * BILLION + before.tv_nsec);
printf("\nInitializing ...\n");
AZ(PT_Inited());
PT_Init();
AN(PT_Inited());
printf("Building trie ...\n");
for (unsigned i = 0; i < n; i++) {
int ret;
errno = 0;
(void)clock_gettime(CLOCK, &before);
ret = PT_Insert(&origo, i, strings);
(void)clock_gettime(CLOCK, &after);
if (ret != 0) {
fprintf(stderr, "PT_Insert() failed: %s\n",
strerror(errno));
exit(EXIT_FAILURE);
}
ns += tdiff(&before, &after);
}
AZ(atexit(free_on_exit));
printf("... complete.\n");
printf("%u strings inserted in %.9f s, mean %lu ns/insert\n", n,
ns * 1e-9, ns / n);
if (dumpf != NULL) {
FILE *df;
struct vsb *vsb;
printf("\nDumping trie to %s ...\n", dumpf);
vsb = PT_Dump(origo, strings);
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
errno = 0;
if ((df = fopen(dumpf, "w")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
if (fwrite(VSB_data(vsb), 1, VSB_len(vsb), df)
!= (unsigned)VSB_len(vsb)) {
fprintf(stderr, "Error writing to %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
VSB_destroy(&vsb);
if (fclose(df) != 0) {
fprintf(stderr, "Error closing %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
printf("... done.\n");
}
printf("\nGetting stats ...\n");
(void)clock_gettime(CLOCK, &before);
PT_Stats(origo, strings, &stats);
(void)clock_gettime(CLOCK, &after);
printf("Stats computed in %lu ns\n", tdiff(&before, &after));
printf("%lu nodes\n", stats.nodes);
printf("%lu leaves\n", stats.leaves);
printf("%lu terminal nodes\n", stats.terms);
printf("%lu bytes node size\n", stats.nodesz);
printf("%lu min terminating node depth\n", stats.dmin);
printf("%lu max terminating node depth\n", stats.dmax);
printf("%.1f mean terminating node depth\n", stats.davg);
if (do_iters == 0)
exit(EXIT_SUCCESS);
unsigned index[ninputs];
for (unsigned i = 0; i < ninputs; i++)
index[i] = i;
printf("\nBenchmarking with %d iterations of %u input strings\n",
do_iters, ninputs);
if (do_shuf)
printf("Shuffling inputs on each iteration\n");
if (!do_match)
goto prefix;
printf("\nBenchmarking matches ...\n");
ns = 0;
iters = 0;
matches = 0;
(void)clock_gettime(CLOCK, &start);
for (int iter = 0; iter < do_iters; iter++) {
if (do_shuf)
shuffle(index, ninputs);
for (unsigned i = 0; i < ninputs; i++) {
unsigned idx, stridx = index[i];
uint64_t diff;
(void)clock_gettime(CLOCK, &before);
idx = PT_Lookup(origo, strings, inputs[stridx]);
(void)clock_gettime(CLOCK, &after);
if (idx != UINT_MAX) {
AZ(strcmp(strings[idx], inputs[stridx]));
matches++;
}
diff = tdiff(&before, &after);
ns += diff;
iters++;
if (csv != NULL) {
if (idx != UINT_MAX)
fprintf(csv, "match,1,1,%lu\n", diff);
else
fprintf(csv, "match,0,0,%lu\n", diff);
}
}
}
assert(matches <= iters);
printf("... complete.\n");
printf("%ld match operations in %.09f s, mean %lu ns/op\n", iters,
ns * 1e-9, ns / iters);
printf("%ld matches, %ld misses\n", matches, iters - matches);
prefix:
if (!do_prefix)
goto finish;
printf("\nBenchmarking prefix matches ...\n");
unsigned *indices = malloc(ninputs * sizeof(*indices));
AN(indices);
struct match_data match = {
.magic = MATCH_DATA_MAGIC,
.indices = indices,
.limit = ninputs,
};
ns = 0;
iters = 0;
matches = 0;
misses = 0;
exacts = 0;
for (int iter = 0; iter < do_iters; iter++) {
if (do_shuf)
shuffle(index, ninputs);
for (unsigned i = 0; i < ninputs; i++) {
unsigned stridx = index[i];
int ret;
uint64_t diff;
(void)clock_gettime(CLOCK, &before);
ret = PT_Prefixes(origo, strings, inputs[stridx],
&match);
(void)clock_gettime(CLOCK, &after);
AZ(ret);
if (match.n > 0) {
assert(match.min <= match.max);
matches += match.n;
}
else
misses++;
for (unsigned j = 0; j < match.n; j++) {
assert(match.indices[j] >= match.min);
assert(match.indices[j] <= match.max);
AN(strstr(inputs[stridx],
strings[match.indices[j]]));
}
if (match.exact != UINT_MAX) {
AZ(strcmp(inputs[stridx],
strings[match.exact]));
exacts++;
}
diff = tdiff(&before, &after);
ns += diff;
iters++;
if (csv != NULL) {
if (match.n > 0)
fprintf(csv, "prefix,%u,%d,%lu\n",
match.n,
match.exact != UINT_MAX, diff);
else
fprintf(csv, "prefix,0,0,%lu\n", diff);
}
}
}
(void)clock_gettime(CLOCK, &finish);
AZ(getrusage(RUSAGE_SELF, &rusage));
assert(matches >= exacts);
printf("... complete.\n");
printf("%ld prefix match operations in %.09f s, mean %lu ns/op\n",
iters, ns * 1e-9, ns / iters);
printf("%ld prefixes found, %ld exact matches, %ld misses\n", matches,
exacts, misses);
finish:
if (csv != NULL && fclose(csv) != 0) {
fprintf(stderr, "Error closing %s: %s\n", csvf,
strerror(errno));
exit(EXIT_FAILURE);
}
ns = tdiff(&start, &finish);
printf("\nBenchmark wall clock time %.09f s\n", ns * 1e-9);
AZ(getrusage(RUSAGE_SELF, &rusage));
printf("user %.06f s, sys %.06f s, vcsw %ld, ivcsw %ld\n",
rusage.ru_utime.tv_sec + 1e-6 * rusage.ru_utime.tv_usec,
rusage.ru_stime.tv_sec + 1e-6 * rusage.ru_stime.tv_usec,
rusage.ru_nvcsw, rusage.ru_nivcsw);
exit(EXIT_SUCCESS);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment