Commit 7fc3a4fe authored by Geoff Simmons's avatar Geoff Simmons

Remove the patricia interface.

parent 7f6a85f9
......@@ -7,8 +7,6 @@ vmod_LTLIBRARIES = libvmod_selector.la
libvmod_selector_la_SOURCES = \
vmod_selector.c \
patricia.h \
patricia.c \
qp.h \
qp.c \
popcnt_compat.h \
......@@ -26,11 +24,9 @@ dist_man_MANS = vmod_selector.3
@BUILD_VSC_SELECTOR@
vmod_selector.c patricia.c: patricia.h
vmod_selector.c qp.c: qp.h popcnt_compat.h
qp.c: qp.h popcnt_compat.h
ph.c: ph.h rnd.h
vmod_selector.c ph.c: ph.h rnd.h
vmod_selector.lo: $(nodist_libvmod_selector_la_SOURCES)
......
/*-
* Copyright (c) 2018 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Inspired in part by Varnish hash_critbit.c and tarsnap's Patricia
* implementation.
*/
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "patricia.h"
/*
* A trie is comprised by a tree of pt_y nodes and an array of strings,
* both of which are owned by a VMOD object. Both are passed in to the
* PT_* functions.
*
* idx is an index to the strings array, and the region of the string
* represented at a node is strings[idx][off] to strings[idx][off+len],
* inclusive.
*
* Terminating nulls form a part of the target string. So "foo\0" and
* "foobar\0" differ at the fourth byte, and the critical bit at that byte
* is the most significant 1-bit in 'b'.
*
* bitmask is set at the critical bit in the differing byte, and
* determines whether searches continue into leaf[0] or leaf[1].
*/
struct pt_y {
unsigned magic;
#define PT_Y_MAGIC 0xfa564d14
unsigned idx;
struct pt_y *leaf[2];
unsigned short off;
unsigned short len;
unsigned char bitmask;
};
static unsigned char pt_bittbl[256] = { 0 };
void
PT_Init(void)
{
unsigned char x;
unsigned y;
AZ(pt_bittbl[0x34 ^ 0x34]);
y = 0;
for (x = 0; x < 8; x++)
for (; y < (1U << x); y++)
pt_bittbl[y] = 8 - x;
/* Quick asserts for sanity check */
assert(pt_bittbl[0x34 ^ 0x34] == 8);
AZ(pt_bittbl[0xaa ^ 0x55]);
assert(pt_bittbl[0x01 ^ 0x22] == 2);
assert(pt_bittbl[0x10 ^ 0x0b] == 3);
}
int
PT_Inited(void)
{
return (pt_bittbl[0x34 ^ 0x34] != 0);
}
static struct pt_y *
y_alloc(unsigned idx, unsigned short off, size_t len)
{
struct pt_y *y;
if (len > USHRT_MAX) {
errno = ERANGE;
return (NULL);
}
errno = 0;
ALLOC_OBJ(y, PT_Y_MAGIC);
if (y == NULL)
return (NULL);
y->idx = idx;
y->off = off;
y->len = (unsigned short) len;
AZ(y->leaf[0]);
AZ(y->leaf[1]);
AZ(y->bitmask);
return (y);
}
static inline struct pt_y *
y_leaf_alloc(unsigned idx, unsigned char *c, unsigned char *b)
{
return y_alloc(idx, (unsigned short)(uintptr_t)(c - b),
strlen((char *)c));
}
static struct pt_y *
y_dup(struct pt_y *y0, unsigned short len)
{
struct pt_y *y;
assert(len < y0->len);
y = y_alloc(y0->idx, y0->off + len, y0->len - len);
if (y == NULL)
return (NULL);
y->leaf[0] = y0->leaf[0];
y->leaf[1] = y0->leaf[1];
y->bitmask = y0->bitmask;
return (y);
}
int
PT_Insert(struct pt_y * * restrict root, unsigned idx,
char * const restrict * const restrict strings)
{
struct pt_y *y;
unsigned char *c, *b;
AN(root);
CHECK_OBJ_ORNULL(*root, PT_Y_MAGIC);
AN(strings);
AN(strings[idx]);
if (*root == NULL) {
*root = y_alloc(idx, 0, strlen(strings[idx]));
if (*root == NULL)
return (-1);
return (0);
}
y = *root;
b = (unsigned char *)strings[idx];
c = b;
errno = 0;
for (;;) {
unsigned short i;
unsigned char *s;
unsigned char bit;
struct pt_y *y_old, *y_new;
CHECK_OBJ_NOTNULL(y, PT_Y_MAGIC);
s = (unsigned char *)(strings[y->idx] + y->off);
for (i = 0; *c != '\0' && i < y->len && s[i] == *c; i++)
c++;
if (s[i] == '\0' && *c == '\0') {
/*
* The string to be inserted is already in the
* trie.
*/
assert(i == y->len);
errno = EINVAL;
return (-1);
}
if (i == y->len) {
/*
* The string to be inserted has a prefix that is
* already in the trie.
*/
bit = (y->bitmask & *c) != 0;
assert(bit < 2);
if (y->leaf[bit] != NULL) {
y = y->leaf[bit];
continue;
}
y_new = y_leaf_alloc(idx, c, b);
if (y_new == NULL)
return (-1);
y->leaf[bit] = y_new;
return (0);
}
/* Split the current node. */
y_new = y_leaf_alloc(idx, c, b);
if (y_new == NULL)
return (-1);
y_old = y_dup(y, i);
if (y_old == NULL) {
FREE_OBJ(y_new);
return (-1);
}
y->len = i;
y->bitmask = 0x80 >> pt_bittbl[s[i] ^ *c];
assert((*c & y->bitmask) != (s[i] & y->bitmask));
bit = (*c & y->bitmask) != 0;
assert(bit < 2);
y->leaf[bit] = y_new;
y->leaf[1-bit] = y_old;
return (0);
}
}
unsigned
PT_Lookup(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
const char * const restrict subject)
{
const struct pt_y *y = root;
size_t len;
AN(strings);
AN(subject);
if (root == NULL)
return UINT_MAX;
len = strlen(subject);
for (;;) {
unsigned char b;
size_t l;
CHECK_OBJ(y, PT_Y_MAGIC);
l = y->off + y->len;
if (l > len)
return UINT_MAX;
if (l == len && strings[y->idx][l] == '\0')
break;
b = (y->bitmask & subject[l]) != 0;
assert(b < 2);
if (y->leaf[b] == NULL)
return UINT_MAX;
y = y->leaf[b];
}
if (strcmp(subject, strings[y->idx]) == 0)
return y->idx;
return (UINT_MAX);
}
static int
pt_search(const struct pt_y * const restrict y,
char * const restrict * const restrict strings,
const unsigned char * restrict subject, size_t len,
struct match_data * const restrict match)
{
size_t l;
if (y == NULL)
return (0);
CHECK_OBJ(y, PT_Y_MAGIC);
l = y->off + y->len;
if (l > len)
return (0);
if (y->len > 0
&& memcmp(subject + y->off, strings[y->idx] + y->off, y->len) != 0)
return (0);
if (strings[y->idx][l] == '\0') {
if (match->n == match->limit)
return (-1);
match->indices[match->n] = y->idx;
match->n++;
if (y->idx < match->min)
match->min = y->idx;
if (y->idx > match->max)
match->max = y->idx;
if (l == len) {
match->exact = y->idx;
return (0);
}
}
if (pt_search(y->leaf[0], strings, subject, len, match) != 0)
return (-1);
if (pt_search(y->leaf[1], strings, subject, len, match) != 0)
return (-1);
return (0);
}
int
PT_Prefixes(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
const char * const restrict subject,
struct match_data * const restrict match)
{
size_t len;
CHECK_OBJ_NOTNULL(match, MATCH_DATA_MAGIC);
AN(match->indices);
AN(match->limit);
AN(strings);
AN(subject);
match->n = 0;
match->min = UINT_MAX;
match->max = 0;
match->exact = UINT_MAX;
len = strlen(subject);
return (pt_search(root, strings, (unsigned char *)subject, len, match));
}
void
PT_Free(struct pt_y *y)
{
if (y == NULL)
return;
CHECK_OBJ(y, PT_Y_MAGIC);
PT_Free(y->leaf[0]);
PT_Free(y->leaf[1]);
FREE_OBJ(y);
}
static void
pt_print_tree(struct pt_y *y, struct vsb *sb, char **strings)
{
CHECK_OBJ_NOTNULL(y, PT_Y_MAGIC);
CHECK_OBJ_NOTNULL(sb, VSB_MAGIC);
VSB_printf(sb, "node = %p\n", y);
VSB_printf(sb, "leaf[0] = %p\n", y->leaf[0]);
VSB_printf(sb, "leaf[1] = %p\n", y->leaf[1]);
VSB_printf(sb, "idx = %u\n", y->idx);
VSB_printf(sb, "off = %u\n", y->off);
VSB_printf(sb, "len = %u\n", y->len);
AN(strings[y->idx]);
VSB_printf(sb, "strings[idx] = %s\n", strings[y->idx]);
VSB_printf(sb, "strings[idx][0]..[off] = %.*s\n", y->off,
strings[y->idx]);
VSB_printf(sb, "strings[idx][off]..[off+len] = %.*s\n", y->len,
strings[y->idx] + y->off);
VSB_printf(sb, "bitmask = 0x%02x\n\n", y->bitmask);
if (y->leaf[0] != NULL)
pt_print_tree(y->leaf[0], sb, strings);
if (y->leaf[1] != NULL)
pt_print_tree(y->leaf[1], sb, strings);
}
struct vsb *
PT_Dump(struct pt_y *root, char **strings)
{
struct vsb *sb = VSB_new_auto();
VSB_printf(sb, "root = %p\n\n", root);
if (root != NULL) {
AN(strings);
pt_print_tree(root, sb, strings);
}
VSB_finish(sb);
return (sb);
}
void
pt_stats(const struct pt_y * const restrict y,
char * const restrict * const restrict strings,
struct pt_stats * const restrict stats, unsigned depth)
{
if (y == NULL)
return;
CHECK_OBJ(y, PT_Y_MAGIC);
depth++;
stats->nodes++;
if (strings[y->idx][y->off + y->len] == '\0') {
if (depth < stats->dmin)
stats->dmin = depth;
if (depth > stats->dmax)
stats->dmax = depth;
stats->davg += (depth - stats->davg) / (stats->terms + 1.);
stats->terms++;
}
if (y->leaf[0] == NULL && y->leaf[1] == NULL) {
stats->leaves++;
return;
}
pt_stats(y->leaf[0], strings, stats, depth);
pt_stats(y->leaf[1], strings, stats, depth);
}
void
PT_Stats(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
struct pt_stats * const restrict stats)
{
CHECK_OBJ_NOTNULL(stats, PT_STATS_MAGIC);
stats->nodes = 0;
stats->leaves = 0;
stats->terms = 0;
stats->dmin = UINT64_MAX;
stats->dmax = 0;
stats->davg = 0.;
stats->nodesz = sizeof(*root);
pt_stats(root, strings, stats, 0);
}
/*-
* Copyright (c) 2018 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <errno.h>
#include <limits.h>
#include <unistd.h>
#include "vsb.h"
struct pt_y;
struct match_data {
unsigned magic;
#define MATCH_DATA_MAGIC 0x0d9a845e
unsigned *indices;
unsigned limit;
unsigned n;
unsigned exact;
unsigned min;
unsigned max;
};
struct pt_stats {
unsigned magic;
#define PT_STATS_MAGIC 0xf1c1114e
uint64_t nodes;
uint64_t leaves;
uint64_t terms;
uint64_t nodesz;
uint64_t dmin;
uint64_t dmax;
double davg;
};
void PT_Init(void);
int PT_Inited(void);
int PT_Insert(struct pt_y * * restrict root, unsigned idx,
char * const restrict * const restrict strings);
unsigned PT_Lookup(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
const char * const restrict subject);
int PT_Prefixes(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
const char * const restrict subject,
struct match_data * const restrict match);
void PT_Stats(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
struct pt_stats * const restrict stats);
void PT_Free(struct pt_y *y);
struct vsb * PT_Dump(struct pt_y *root, char **strings);
......@@ -4,11 +4,7 @@ AM_CFLAGS = $(VARNISHAPI_CFLAGS) -I$(top_srcdir)/src -Wall -Werror -Wextra \
-std=c99
AM_LDFLAGS = $(VARNISHAPI_LIBS)
bin_PROGRAMS = bench bench_qp bench_ph
bench_SOURCES = bench.c
bench_LDADD = $(top_builddir)/src/.libs/patricia.o
bin_PROGRAMS = bench_qp bench_ph
bench_qp_SOURCES = bench_qp.c
......
/*-
* Copyright (c) 2020 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for strdup() and getline() */
#define _POSIX_C_SOURCE 200809L
/* for srand48() and drand48() */
#define _XOPEN_SOURCE
#include "config.h"
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <stdint.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "vsb.h"
#include "patricia.h"
#define BILLION (1000 * 1000 * 1000)
#define ITERATIONS (1000)
#define CLOCK (CLOCK_MONOTONIC)
static struct pt_y *origo = NULL;
static uint64_t
tdiff(struct timespec *before, struct timespec *after)
{
uint64_t diff;
if (after->tv_nsec - before->tv_nsec < 0) {
diff = (after->tv_sec - before->tv_sec - 1) * BILLION;
diff += BILLION + after->tv_nsec - before->tv_nsec;
return diff;
}
diff = (after->tv_sec - before->tv_sec) * BILLION;
diff += after->tv_nsec - before->tv_nsec;
return diff;
}
static int
rnd(int lo, int hi)
{
return ((int)(drand48() * (hi - lo)) + lo);
}
static inline void
shuffle(unsigned *index, size_t n)
{
for (unsigned i = 0; i < n - 1; i++) {
int j = rnd(i, n);
int temp = index[i];
index[i] = index[j];
index[j] = temp;
}
}
static void
free_on_exit(void)
{
PT_Free(origo);
}
void
usage(const char *argv, int status)
{
fprintf(stderr,
"Usage: %s [-hs] [-c csvfile] [-d dumpfile] [-i inputfile]\n"
" [-m m|p] [-n iterations] [file]\n", argv);
exit(status);
}
int
main(int argc, char *argv[])
{
FILE *stringsf = stdin, *csv = NULL;
char **strings = NULL, **inputs = NULL, *line, *inputf = NULL,
*csvf = NULL, *dumpf = NULL;
size_t lineln = LINE_MAX;
ssize_t readln;
unsigned n = 0, ninputs = 0;
struct timespec before, after, start, finish;
uint64_t ns = 0, iters, matches, exacts, misses;
struct pt_stats stats = { .magic = PT_STATS_MAGIC };
int opt, do_shuf = 0, do_iters = ITERATIONS, do_match = 1, do_prefix = 1;
struct rusage rusage;
while ((opt = getopt(argc, argv, "hsc:d:i:m:n:")) != -1) {
switch (opt) {
case 'c':
csvf = optarg;
break;
case 'd':
dumpf = optarg;
break;
case 'h':
usage(argv[0], EXIT_SUCCESS);
break;
case 'i':
inputf = optarg;
break;
case 'm':
if (strcmp(optarg, "m") == 0)
do_prefix = 0;
else if (strcmp(optarg, "p") == 0)
do_match = 0;
else
usage(argv[0], EXIT_FAILURE);
break;
case 'n':
do_iters = atoi(optarg);
break;
case 's':
do_shuf = 1;
break;
default:
usage(argv[0], EXIT_FAILURE);
}
}
if (argc - optind > 1)
usage(argv[0], EXIT_FAILURE);
if (argc == optind + 1 && strcmp(argv[optind], "-") != 0) {
errno = 0;
if ((stringsf = fopen(argv[optind], "r")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
}
line = malloc(lineln);
AN(line);
errno = 0;
while ((readln = getline(&line, &lineln, stringsf)) != -1) {
if (readln - 1 > USHRT_MAX) {
fprintf(stderr, "String length %zd too long (max %d)\n",
readln - 1, USHRT_MAX);
exit(EXIT_FAILURE);
}
n++;
strings = realloc(strings, n * sizeof(char *));
AN(strings);
line[readln - 1] = '\0';
strings[n - 1] = strdup(line);
AN(strings[n - 1]);
}
if (ferror(stringsf)) {
fprintf(stderr, "Error reading %s: %s",
stringsf == stdin ? "stdin" : argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
errno = 0;
if (stringsf != stdin && fclose(stringsf) != 0) {
fprintf(stderr, "Error closing %s: %s\n", argv[optind],
strerror(errno));
exit(EXIT_FAILURE);
}
printf("%u strings read\n", n);
if (n == 0)
exit(EXIT_SUCCESS);
if (inputf != NULL) {
FILE *inf;
errno = 0;
if ((inf = fopen(inputf, "r")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
while ((readln = getline(&line, &lineln, inf)) != -1) {
if (readln - 1 > USHRT_MAX) {
fprintf(stderr, "%s: string length %zd "
"too long (max %d)\n", inputf,
readln - 1, USHRT_MAX);
exit(EXIT_FAILURE);
}
ninputs++;
inputs = realloc(inputs, ninputs * sizeof(char *));
AN(inputs);
line[readln - 1] = '\0';
inputs[ninputs - 1] = strdup(line);
AN(inputs[ninputs - 1]);
}
if (ferror(inf)) {
fprintf(stderr, "Error reading %s: %s", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
errno = 0;
if (fclose(inf) != 0) {
fprintf(stderr, "Error closing %s: %s\n", inputf,
strerror(errno));
exit(EXIT_FAILURE);
}
printf("%u input strings read\n", ninputs);
}
if (inputf == NULL || ninputs == 0) {
printf("Using string set as inputs\n");
inputs = strings;
ninputs = n;
}
free(line);
if (csvf != NULL) {
errno = 0;
if ((csv = fopen(csvf, "w")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", csvf,
strerror(errno));
exit(EXIT_FAILURE);
}
fprintf(csv, "type,matches,exact,t\n");
}
AZ(clock_gettime(CLOCK_REALTIME, &before));
srand48(before.tv_nsec);
AZ(clock_getres(CLOCK, &before));
printf("Clock resolution %ld ns\n",
before.tv_sec * BILLION + before.tv_nsec);
printf("\nInitializing ...\n");
AZ(PT_Inited());
PT_Init();
AN(PT_Inited());
printf("Building trie ...\n");
for (unsigned i = 0; i < n; i++) {
int ret;
errno = 0;
(void)clock_gettime(CLOCK, &before);
ret = PT_Insert(&origo, i, strings);
(void)clock_gettime(CLOCK, &after);
if (ret != 0) {
fprintf(stderr, "PT_Insert() failed: %s\n",
strerror(errno));
exit(EXIT_FAILURE);
}
ns += tdiff(&before, &after);
}
AZ(atexit(free_on_exit));
printf("... complete.\n");
printf("%u strings inserted in %.9f s, mean %lu ns/insert\n", n,
ns * 1e-9, ns / n);
if (dumpf != NULL) {
FILE *df;
struct vsb *vsb;
printf("\nDumping trie to %s ...\n", dumpf);
vsb = PT_Dump(origo, strings);
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
errno = 0;
if ((df = fopen(dumpf, "w")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
if (fwrite(VSB_data(vsb), 1, VSB_len(vsb), df)
!= (unsigned)VSB_len(vsb)) {
fprintf(stderr, "Error writing to %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
VSB_destroy(&vsb);
if (fclose(df) != 0) {
fprintf(stderr, "Error closing %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
printf("... done.\n");
}
printf("\nGetting stats ...\n");
(void)clock_gettime(CLOCK, &before);
PT_Stats(origo, strings, &stats);
(void)clock_gettime(CLOCK, &after);
printf("Stats computed in %lu ns\n", tdiff(&before, &after));
printf("%lu nodes\n", stats.nodes);
printf("%lu leaves\n", stats.leaves);
printf("%lu terminal nodes\n", stats.terms);
printf("%lu bytes node size\n", stats.nodesz);
printf("%lu min terminating node depth\n", stats.dmin);
printf("%lu max terminating node depth\n", stats.dmax);
printf("%.1f mean terminating node depth\n", stats.davg);
if (do_iters == 0)
exit(EXIT_SUCCESS);
unsigned index[ninputs];
for (unsigned i = 0; i < ninputs; i++)
index[i] = i;
printf("\nBenchmarking with %d iterations of %u input strings\n",
do_iters, ninputs);
if (do_shuf)
printf("Shuffling inputs on each iteration\n");
if (!do_match)
goto prefix;
printf("\nBenchmarking matches ...\n");
ns = 0;
iters = 0;
matches = 0;
(void)clock_gettime(CLOCK, &start);
for (int iter = 0; iter < do_iters; iter++) {
if (do_shuf)
shuffle(index, ninputs);
for (unsigned i = 0; i < ninputs; i++) {
unsigned idx, stridx = index[i];
uint64_t diff;
(void)clock_gettime(CLOCK, &before);
idx = PT_Lookup(origo, strings, inputs[stridx]);
(void)clock_gettime(CLOCK, &after);
if (idx != UINT_MAX) {
AZ(strcmp(strings[idx], inputs[stridx]));
matches++;
}
diff = tdiff(&before, &after);
ns += diff;
iters++;
if (csv != NULL) {
if (idx != UINT_MAX)
fprintf(csv, "match,1,1,%lu\n", diff);
else
fprintf(csv, "match,0,0,%lu\n", diff);
}
}
}
assert(matches <= iters);
printf("... complete.\n");
printf("%ld match operations in %.09f s, mean %lu ns/op\n", iters,
ns * 1e-9, ns / iters);
printf("%ld matches, %ld misses\n", matches, iters - matches);
prefix:
if (!do_prefix)
goto finish;
printf("\nBenchmarking prefix matches ...\n");
unsigned *indices = malloc(ninputs * sizeof(*indices));
AN(indices);
struct match_data match = {
.magic = MATCH_DATA_MAGIC,
.indices = indices,
.limit = ninputs,
};
ns = 0;
iters = 0;
matches = 0;
misses = 0;
exacts = 0;
for (int iter = 0; iter < do_iters; iter++) {
if (do_shuf)
shuffle(index, ninputs);
for (unsigned i = 0; i < ninputs; i++) {
unsigned stridx = index[i];
int ret;
uint64_t diff;
(void)clock_gettime(CLOCK, &before);
ret = PT_Prefixes(origo, strings, inputs[stridx],
&match);
(void)clock_gettime(CLOCK, &after);
AZ(ret);
if (match.n > 0) {
assert(match.min <= match.max);
matches += match.n;
}
else
misses++;
for (unsigned j = 0; j < match.n; j++) {
assert(match.indices[j] >= match.min);
assert(match.indices[j] <= match.max);
AN(strstr(inputs[stridx],
strings[match.indices[j]]));
}
if (match.exact != UINT_MAX) {
AZ(strcmp(inputs[stridx],
strings[match.exact]));
exacts++;
}
diff = tdiff(&before, &after);
ns += diff;
iters++;
if (csv != NULL) {
if (match.n > 0)
fprintf(csv, "prefix,%u,%d,%lu\n",
match.n,
match.exact != UINT_MAX, diff);
else
fprintf(csv, "prefix,0,0,%lu\n", diff);
}
}
}
(void)clock_gettime(CLOCK, &finish);
AZ(getrusage(RUSAGE_SELF, &rusage));
assert(matches >= exacts);
printf("... complete.\n");
printf("%ld prefix match operations in %.09f s, mean %lu ns/op\n",
iters, ns * 1e-9, ns / iters);
printf("%ld prefixes found, %ld exact matches, %ld misses\n", matches,
exacts, misses);
finish:
if (csv != NULL && fclose(csv) != 0) {
fprintf(stderr, "Error closing %s: %s\n", csvf,
strerror(errno));
exit(EXIT_FAILURE);
}
ns = tdiff(&start, &finish);
printf("\nBenchmark wall clock time %.09f s\n", ns * 1e-9);
AZ(getrusage(RUSAGE_SELF, &rusage));
printf("user %.06f s, sys %.06f s, vcsw %ld, ivcsw %ld\n",
rusage.ru_utime.tv_sec + 1e-6 * rusage.ru_utime.tv_usec,
rusage.ru_stime.tv_sec + 1e-6 * rusage.ru_stime.tv_usec,
rusage.ru_nvcsw, rusage.ru_nivcsw);
exit(EXIT_SUCCESS);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment