Commit 5454faa6 authored by Geoff Simmons's avatar Geoff Simmons

Add a benchmark tool for the perfect hash functions.

parent 666548fb
......@@ -46,3 +46,4 @@ Makefile.in
/src/tests/bench/bench
/src/tests/bench/bench_qp
/src/tests/bench/bench_ph
......@@ -4,7 +4,7 @@ AM_CFLAGS = $(VARNISHAPI_CFLAGS) -I$(top_srcdir)/src -Wall -Werror -Wextra \
-std=c99
AM_LDFLAGS = $(VARNISHAPI_LIBS)
bin_PROGRAMS = bench bench_qp
bin_PROGRAMS = bench bench_qp bench_ph
bench_SOURCES = bench.c
......@@ -13,3 +13,7 @@ bench_LDADD = $(top_srcdir)/src/.libs/patricia.o
bench_qp_SOURCES = bench_qp.c
bench_qp_LDADD = $(top_srcdir)/src/.libs/qp.o
bench_ph_SOURCES = bench_ph.c
bench_ph_LDADD = $(top_srcdir)/src/.libs/ph.o
/*-
* Copyright (c) 2020 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for strdup() and getline() */
#define _POSIX_C_SOURCE 200809L
/* for srand48() and drand48() */
#define _XOPEN_SOURCE
#include "config.h"
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <stdint.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "vsb.h"
#include "ph.h"
#define BILLION (1000 * 1000 * 1000)
#define ITERATIONS (1000)
#define CLOCK (CLOCK_MONOTONIC)
static struct ph *ph = NULL;
static uint64_t
tdiff(struct timespec *before, struct timespec *after)
{
uint64_t diff;
if (after->tv_nsec - before->tv_nsec < 0) {
diff = (after->tv_sec - before->tv_sec - 1) * BILLION;
diff += BILLION + after->tv_nsec - before->tv_nsec;
return diff;
}
diff = (after->tv_sec - before->tv_sec) * BILLION;
diff += after->tv_nsec - before->tv_nsec;
return diff;
}
static int
rnd(int lo, int hi)
{
return ((int)(drand48() * (hi - lo)) + lo);
}
static inline void
shuffle(unsigned *index, size_t n)
{
for (unsigned i = 0; i < n - 1; i++) {
int j = rnd(i, n);
int temp = index[i];
index[i] = index[j];
index[j] = temp;
}
}
static void
free_on_exit(void)
{
PH_Free(ph);
}
void
usage(const char *argv, int status)
{
fprintf(stderr,
"Usage: %s [-hs] [-c csvfile] [-i inputfile] [-n iterations] "
"[file]\n", argv);
exit(status);
}
int
main(int argc, char *argv[])
{
FILE *stringsf = stdin, *csv = NULL;
char **strings = NULL, **inputs = NULL, *line, *inputf = NULL,
*csvf = NULL;
size_t lineln = LINE_MAX;
ssize_t readln;
unsigned n = 0, ninputs = 0;
struct timespec before, after, start, finish;
uint64_t ns = 0, iters, matches;
int opt, do_shuf = 0, do_iters = ITERATIONS;
struct rusage rusage;
while ((opt = getopt(argc, argv, "hsc:i:n:")) != -1) {
switch (opt) {
case 'c':
csvf = optarg;
break;
case 'h':
usage(argv[0], EXIT_SUCCESS);
break;
case 'i':
inputf = optarg;
break;
case 'n':
do_iters = atoi(optarg);
break;
case 's':
do_shuf = 1;
break;
default:
usage(argv[0], EXIT_FAILURE);
}
}
if (argc - optind > 1)
usage(argv[0], EXIT_FAILURE);
if (argc == optind + 1 && strcmp(argv[optind], "-") != 0) {
errno = 0;
if ((stringsf = fopen(argv[optind], "r")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
}
line = malloc(lineln);
AN(line);
errno = 0;
while ((readln = getline(&line, &lineln, stringsf)) != -1) {
if (readln - 1 > USHRT_MAX) {
fprintf(stderr, "String length %zd too long (max %d)\n",
readln - 1, USHRT_MAX);
exit(EXIT_FAILURE);
}
n++;
strings = realloc(strings, n * sizeof(char *));
AN(strings);
line[readln - 1] = '\0';
strings[n - 1] = strdup(line);
AN(strings[n - 1]);
}
if (ferror(stringsf)) {
fprintf(stderr, "Error reading %s: %s",
stringsf == stdin ? "stdin" : argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
errno = 0;
if (stringsf != stdin && fclose(stringsf) != 0) {
fprintf(stderr, "Error closing %s: %s\n", argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
printf("%u strings read\n", n);
if (n == 0)
exit(EXIT_SUCCESS);
if (inputf != NULL) {
FILE *inf;
errno = 0;
if ((inf = fopen(inputf, "r")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
while ((readln = getline(&line, &lineln, inf)) != -1) {
if (readln - 1 > USHRT_MAX) {
fprintf(stderr, "%s: string length %zd "
"too long (max %d)\n", inputf,
readln - 1, USHRT_MAX);
exit(EXIT_FAILURE);
}
ninputs++;
inputs = realloc(inputs, ninputs * sizeof(char *));
AN(inputs);
line[readln - 1] = '\0';
inputs[ninputs - 1] = strdup(line);
AN(inputs[ninputs - 1]);
}
if (ferror(inf)) {
fprintf(stderr, "Error reading %s: %s", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
errno = 0;
if (fclose(inf) != 0) {
fprintf(stderr, "Error closing %s: %s\n", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
printf("%u input strings read\n", ninputs);
}
if (inputf == NULL || ninputs == 0) {
printf("Using string set as inputs\n");
inputs = strings;
ninputs = n;
}
free(line);
if (csvf != NULL) {
errno = 0;
if ((csv = fopen(csvf, "w")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", csvf,
strerror(errno));
exit(EXIT_FAILURE);
}
fprintf(csv, "type,matches,exact,t\n");
}
AZ(clock_gettime(CLOCK_REALTIME, &before));
srand48(before.tv_nsec);
AZ(clock_getres(CLOCK, &before));
printf("Clock resolution %ld ns\n",
before.tv_sec * BILLION + before.tv_nsec);
printf("\nBuilding perfect hash ...\n");
errno = 0;
(void)clock_gettime(CLOCK, &before);
ph = PH_Generate(strings, n);
(void)clock_gettime(CLOCK, &after);
ns += tdiff(&before, &after);
if (ph == NULL) {
fprintf(stderr, "PH_Generate() failed: %s\n", strerror(errno));
if (errno == ERANGE)
for (unsigned i = 0; i < n; i++)
fprintf(stderr, "%s\n", strings[i]);
printf("Failure for %u strings after %.9f s\n", n, ns * 1e-9);
exit(EXIT_FAILURE);
}
AZ(atexit(free_on_exit));
printf("... complete.\n");
printf("Generated for %u strings in %.9f s, mean %lu ns/string\n", n,
ns * 1e-9, ns / n);
if (do_iters == 0)
exit(EXIT_SUCCESS);
unsigned index[ninputs];
for (unsigned i = 0; i < ninputs; i++)
index[i] = i;
printf("\nBenchmarking with %d iterations of %u input strings\n",
do_iters, ninputs);
if (do_shuf)
printf("Shuffling inputs on each iteration\n");
(void)clock_gettime(CLOCK, &start);
printf("\nBenchmarking matches ...\n");
ns = 0;
iters = 0;
matches = 0;
for (int iter = 0; iter < do_iters; iter++) {
if (do_shuf)
shuffle(index, ninputs);
for (unsigned i = 0; i < ninputs; i++) {
unsigned idx, stridx = index[i];
uint64_t diff;
(void)clock_gettime(CLOCK, &before);
idx = PH_Lookup(ph, strings, inputs[stridx]);
(void)clock_gettime(CLOCK, &after);
if (idx != UINT_MAX) {
AZ(strcmp(strings[idx], inputs[stridx]));
matches++;
}
diff = tdiff(&before, &after);
ns += diff;
iters++;
if (csv != NULL) {
if (idx != UINT_MAX)
fprintf(csv, "match,1,1,%lu\n", diff);
else
fprintf(csv, "match,0,0,%lu\n", diff);
}
}
}
assert(matches <= iters);
printf("... complete.\n");
printf("%ld match operations in %.09f s, mean %lu ns/op\n", iters,
ns * 1e-9, ns / iters);
printf("%ld matches, %ld misses\n", matches, iters - matches);
(void)clock_gettime(CLOCK, &finish);
if (csv != NULL && fclose(csv) != 0) {
fprintf(stderr, "Error closing %s: %s\n", csvf,
strerror(errno));
exit(EXIT_FAILURE);
}
ns = tdiff(&start, &finish);
printf("\nBenchmark wall clock time %.09f s\n", ns * 1e-9);
AZ(getrusage(RUSAGE_SELF, &rusage));
printf("user %.06f s, sys %.06f s, vcsw %ld, ivcsw %ld\n",
rusage.ru_utime.tv_sec + 1e-6 * rusage.ru_utime.tv_usec,
rusage.ru_stime.tv_sec + 1e-6 * rusage.ru_stime.tv_usec,
rusage.ru_nvcsw, rusage.ru_nivcsw);
exit(EXIT_SUCCESS);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment