Commit 1f7815f1 authored by Geoff Simmons's avatar Geoff Simmons

Implement perfect hashing based on universal hashing.

Universal hashing has a sounder theoretical basis; in particular, it
doesn't have the dubious minimum hash table size below which a
perfect hash may not be possible, and which was set by trial and error.

For nearly all test data, universal hashing performs at least as
well or better. Especially better for sets with longer strings,
since the subject string is cast as an array of uint32_t, so the
hash is computed in fewer operations.

The only exception I've noticed is /usr/share/dict/words, which now
appears to have more collisions than under the previous approach.
But it appears likely that this only becomes an issue for sets that
are much larger than are probable for VCL use cases (in the 100,000
range), and if all of the sets' elements are tested for matches
about equally often (whereas real-world usage patterns tend to
match a subset much more frequently).
parent b956c988
......@@ -13,7 +13,8 @@ libvmod_selector_la_SOURCES = \
qp.c \
popcnt_compat.h \
ph.h \
ph.c
ph.c \
rnd.h
nodist_libvmod_selector_la_SOURCES = \
vcc_if.c \
......@@ -29,7 +30,7 @@ vmod_selector.c patricia.c: patricia.h
qp.c: qp.h popcnt_compat.h
ph.c: ph.h
ph.c: ph.h rnd.h
vmod_selector.lo: $(nodist_libvmod_selector_la_SOURCES)
......
......@@ -26,57 +26,94 @@
* SUCH DAMAGE.
*/
#include <stdint.h>
/*
* Perfect hashing based on universal hashing. See:
* - Fredman, Komlos & Szemeredi (1984), JACM 31 (3)
* - Thorup (2020), arxiv.org 1504.06804
* and the Wikipedia articles "Universal Hashing" and "Perfect hash
* function".
*/
#include <stdlib.h>
#include <string.h>
#include "vdef.h"
#include "vas.h"
#include "miniobj.h"
#include "vbm.h"
#include "ph.h"
#include "rnd.h"
/* Exponents of the first 7 Mersenne primes */
static unsigned lg_mersenne[] = {
2, 3, 5, 7, 13, 17, 19,
};
#define LEN(a) (sizeof(a) / sizeof(a[0]))
#define LAST(a) (a[LEN(a)-1])
/*
* There is a non-zero probability that no perfect hash can be found,
* infinitesimal for moderate to large sets, but not impossible for small
* sets, if the tables are about as small as the set. Tests have never
* shown hash generation failure when the tables have this minimum size.
* Limit to MAXN strings in a set, so that we can round up the size of the
* hash table to a power of 2, but can use UINT_MAX to signify "no match".
*
* MAX_BUCKETN is the largest N such that N^2 < MAXN.
*/
#define MINBITS (8)
#define MAX_LG (31)
#define MAXN (1U << MAX_LG)
#define MAX_BUCKETN (46340)
struct hash {
unsigned magic;
#define HASH_MAGIC 0x11a887ce
uint32_t mask;
uint64_t *k;
uint64_t addend;
uint32_t *tbl;
};
union tbl_t {
struct hash *h2;
uint32_t idx;
};
struct ph {
unsigned magic;
#define PH_MAGIC 0x00cd8c1d
int32_t *inter;
unsigned *tbl;
struct hash *h1;
union tbl_t *tbl;
struct vbitmap *collision;
size_t l;
size_t minlen;
size_t maxlen;
unsigned mask;
unsigned bits;
};
struct bucket {
unsigned *idx;
uint32_t *idx;
int n;
};
#define FNV64_OFFSET_BASIS (0xcbf29ce484222325)
#define FNV64_PRIME (0x00000100000001b3)
/* FNV-1a 64-bit with xor-folding */
/*
* Multilinear-HM from Lemire & Kaser (2018), except that we add the tail
* part. arxiv.org 1202.4961
*/
static inline uint32_t
hash(uint32_t h, const char *s, size_t len, unsigned bits, unsigned mask)
hash(const struct hash * const restrict hash,
const char * const restrict subject, size_t len)
{
uint64_t h64 = h;
uint64_t h = hash->addend;
const uint64_t *k = hash->k;
const uint32_t *s = (const uint32_t *)subject, *e;
uint32_t tail[2] = {0};
size_t l = (len / 8) * 2;
if (h64 == 0)
h64 = FNV64_OFFSET_BASIS;
while (len--) {
h64 ^= (uint64_t)*s++;
h64 *= FNV64_PRIME;
for (e = s + l; s < e; k += 2, s += 2)
h += (*s + *k) * (*(s+1) + *(k+1));
}
return (((h64 >> bits) ^ h64) & mask);
memcpy(tail, e, (void *)(subject + len) - (void *)e);
h += (*tail + *k) * (*(tail+1) + *(k+1));
return ((h >> 32) & hash->mask);
}
/*
......@@ -92,41 +129,70 @@ lg(unsigned n)
}
/*
* Sort buckets by length in *descending* order, which is the opposite of
* the usual sense of qsort(). So:
*
* b1 > b2 -> return < 0
* b1 < b2 -> return > 0;
* For set sizes <= 2^19, the hash table size is 2^N, where N is the
* exponent of the next highest Mersenne prime, so that the bitmasking is
* equivalent to mod prime. That probably covers every realistic use case
* for VCL. But should someone use a set larger than 2^19, we take the
* next highest power of 2.
*/
static int
len_desc_cmp(const void *b1, const void *b2)
static uint32_t
getsz(unsigned n)
{
AN(b1);
AN(b2);
unsigned bits = lg(n);
for (unsigned i = 0; i < LEN(lg_mersenne); i++) {
if (bits < lg_mersenne[i]) {
bits = lg_mersenne[i];
if (i > 0 && n == (1U << lg_mersenne[i - 1]))
bits = lg_mersenne[i - 1];
break;
}
}
if (n > (1U << LAST(lg_mersenne)) && n != (unsigned)(1U << bits))
bits++;
assert(bits <= MAX_LG);
return (1U << bits);
}
static uint64_t
rnd64(void)
{
return (rnd_nxt() + ((uint64_t)rnd_nxt() << 32));
}
return ( ((struct bucket *)b2)->n - ((struct bucket *)b1)->n);
void
PH_Init(uint32_t seed[4])
{
rnd_init(seed);
}
struct ph *
PH_Generate(char * const * const strings, unsigned n)
{
size_t min = SIZE_MAX, max = 0, len;
int *found = NULL;
size_t min = SIZE_MAX, max = 0, l;
struct hash *h1 = NULL;
union tbl_t *tbl = NULL;
struct vbitmap *collision = NULL;
struct bucket *buckets = NULL;
int32_t *inter = NULL;
unsigned bits, sz, mask, i, *tbl = NULL;
uint32_t sz;
struct ph *ph = NULL;
AN(strings);
assert(n > 0 && n < 1 << 30);
if (n == 0 || n > MAXN) {
errno = ERANGE;
return (NULL);
}
bits = lg(n);
if (n != (unsigned)(1 << bits))
bits++;
if (bits < MINBITS)
bits = MINBITS;
sz = 1 << bits;
mask = sz - 1;
sz = getsz(n);
for (unsigned i = 0; i < n; i++) {
size_t len = strlen(strings[i]);
if (len < min)
min = len;
if (len > max)
max = len;
}
l = ((max + 7) / 8) * 2;
errno = 0;
buckets = calloc(sz, sizeof(*buckets));
......@@ -136,18 +202,29 @@ PH_Generate(char * const * const strings, unsigned n)
if (tbl == NULL)
goto exit;
memset(tbl, 0xff, sz * sizeof(*tbl));
inter = calloc(sz, sizeof(*inter));
if (inter == NULL)
ALLOC_OBJ(h1, HASH_MAGIC);
if (h1 == NULL)
goto exit;
found = malloc(sz * sizeof(*found));
if (found == NULL)
collision = vbit_new(sz);
h1->k = malloc(l * sizeof(*h1->k));
if (h1->k == NULL)
goto exit;
h1->addend = rnd64();
/*
* XXX the literature sometimes says that the keys should all be
* odd, since repeated multiplication makes the result tend to 0,
* but sometimes doesn't mention it. Not sure if |1 is needed
* here.
*/
for (unsigned i = 0; i < l; i++)
h1->k[i] = rnd64() | 1;
h1->mask = sz - 1;
for (i = 0; i < n; i++) {
for (unsigned i = 0; i < n; i++) {
uint32_t h;
len = strlen(strings[i]);
h = hash(0, strings[i], len, bits, mask);
h = hash(h1, strings[i], strlen(strings[i]));
assert(h < sz);
errno = 0;
......@@ -158,94 +235,110 @@ PH_Generate(char * const * const strings, unsigned n)
goto exit;
buckets[h].idx[buckets[h].n] = i;
buckets[h].n++;
if (len < min)
min = len;
if (len > max)
max = len;
}
qsort(buckets, sz, sizeof(*buckets), len_desc_cmp);
for (i = 0; i < sz && buckets[i].n > 1; i++) {
struct bucket *bucket = &buckets[i];
uint32_t m = 1;
for (unsigned i = 0; i < sz; i++) {
struct bucket *bucket;
struct hash *bhsh;
uint32_t bsz;
AN(bucket->idx);
memset(found, 0, sz * sizeof(*found));
for (int j = 0; j < bucket->n && m < UINT32_MAX; j++) {
uint32_t h;
len = strlen(strings[bucket->idx[j]]);
h = hash(m, strings[bucket->idx[j]], len, bits, mask);
assert(h < sz);
if (tbl[h] != UINT_MAX || found[h] != 0) {
m++;
j = -1;
memset(found, 0, sz * sizeof(*found));
bucket = &buckets[i];
if (bucket->n == 0) {
AZ(vbit_test(collision, i));
assert(tbl[i].idx == UINT_MAX);
continue;
}
found[h] = 1;
AN(bucket->idx);
if (bucket->n == 1) {
AZ(vbit_test(collision, i));
tbl[i].idx = *bucket->idx;
continue;
}
if (m == UINT32_MAX) {
errno = ERANGE;
/* It's infinitesimally improbable, just assert. */
assert(bucket->n <= MAX_BUCKETN);
vbit_set(collision, i);
bsz = getsz(bucket->n * bucket->n);
assert(bsz <= MAXN);
errno = 0;
ALLOC_OBJ(bhsh, HASH_MAGIC);
if (bhsh == NULL)
goto exit;
}
tbl[i].h2 = bhsh;
bhsh->tbl = malloc(bsz * sizeof(*bhsh->tbl));
if (bhsh->tbl == NULL)
goto exit;
bhsh->k = malloc(l * sizeof(*bhsh->k));
if (bhsh->k == NULL)
goto exit;
memset(bhsh->tbl, 0xff, bsz * sizeof(*bhsh->tbl));
bhsh->mask = bsz - 1;
bhsh->addend = rnd64();
/* XXX as above, unsure about |1 here */
for (unsigned j = 0; j < l; j++)
bhsh->k[j] = rnd64() | 1;
len = strlen(strings[bucket->idx[0]]);
inter[hash(0, strings[bucket->idx[0]], len, bits, mask)] = m;
for (int j = 0; j < bucket->n; j++) {
len = strlen(strings[bucket->idx[j]]);
tbl[hash(m, strings[bucket->idx[j]], len, bits, mask)]
= bucket->idx[j];
uint32_t h = hash(bhsh, strings[bucket->idx[j]],
strlen(strings[bucket->idx[j]]));
assert(h < bsz);
if (bhsh->tbl[h] != UINT_MAX) {
j = -1;
memset(bhsh->tbl, 0xff,
bsz * sizeof(*bhsh->tbl));
bhsh->addend = rnd64() | 1;
for (unsigned k = 0; k < l; k++)
bhsh->k[k] = rnd64() | 1;
continue;
}
bhsh->tbl[h] = bucket->idx[j];
}
/* Reuse found[] for a list of free indices in tbl. */
memset(found, 0, sz * sizeof(*found));
for (unsigned j = 0, n = 0; j < sz; j++)
if (tbl[j] == UINT_MAX)
found[n++] = j;
/*
* Continuing with the value of i from the for loop above, now at
* buckets[i].n <= 1
*/
for (int n = 0; i < sz && buckets[i].n > 0; n++, i++) {
struct bucket *bucket = &buckets[i];
len = strlen(strings[bucket->idx[0]]);
tbl[found[n]] = bucket->idx[0];
inter[hash(0, strings[bucket->idx[0]], len, bits, mask)]
= -found[n] - 1;
}
errno = 0;
ALLOC_OBJ(ph, PH_MAGIC);
if (ph == NULL)
goto exit;
ph->inter = inter;
ph->l = l;
ph->h1 = h1;
ph->tbl = tbl;
ph->mask = mask;
ph->bits = bits;
ph->collision = collision;
ph->minlen = min;
ph->maxlen = max;
exit:
AN(buckets);
for (i = 0; i < sz; i++)
for (unsigned i = 0; i < sz; i++)
if (buckets[i].idx != NULL)
free(buckets[i].idx);
free(buckets);
if (found != NULL)
free(found);
if (ph == NULL) {
if (inter != NULL)
free(inter);
if (tbl != NULL)
if (h1 != NULL) {
AZ(ph->h1->tbl);
if (h1->k != NULL)
free(h1->k);
FREE_OBJ(h1);
}
if (tbl != NULL) {
if (collision != NULL) {
for (unsigned i = 0; i < sz; i++) {
if (!vbit_test(collision, i))
continue;
CHECK_OBJ_NOTNULL(tbl[i].h2,
HASH_MAGIC);
if (tbl[i].h2->tbl != NULL)
free(tbl[i].h2->tbl);
if (tbl[i].h2->k != NULL)
free(tbl[i].h2->k);
FREE_OBJ(tbl[i].h2);
}
vbit_destroy(collision);
}
free(tbl);
}
}
return (ph);
}
......@@ -256,39 +349,120 @@ PH_Lookup(const struct ph * const restrict ph,
const char * const restrict subject)
{
size_t len;
int32_t h;
uint32_t h;
unsigned idx;
CHECK_OBJ_ORNULL(ph, PH_MAGIC);
AN(strings);
AN(subject);
if (ph == NULL)
return (UINT_MAX);
CHECK_OBJ(ph, PH_MAGIC);
CHECK_OBJ_NOTNULL(ph->h1, HASH_MAGIC);
AN(ph->tbl);
AN(strings);
AN(subject);
len = strlen(subject);
if (len < ph->minlen)
return (UINT_MAX);
if (len > ph->maxlen)
return (UINT_MAX);
h = ph->inter[hash(0, subject, len, ph->bits, ph->mask)];
if (h < 0)
idx = ph->tbl[-h - 1];
else
idx = ph->tbl[hash(h, subject, len, ph->bits, ph->mask)];
assert((len >> 2) <= ph->l);
h = hash(ph->h1, subject, len);
idx = ph->tbl[h].idx;
if (vbit_test(ph->collision, h)) {
struct hash *h2 = ph->tbl[h].h2;
CHECK_OBJ_NOTNULL(h2, HASH_MAGIC);
AN(h2->tbl);
h = hash(h2, subject, len);
idx = h2->tbl[h];
}
if (idx == UINT_MAX || strcmp(subject, strings[idx]) != 0)
return (UINT_MAX);
return (idx);
}
struct vsb *
PH_Dump(struct ph *ph, char **strings)
{
struct vsb *sb = VSB_new_auto();
if (ph == NULL) {
VSB_finish(sb);
return (sb);
}
CHECK_OBJ(ph, PH_MAGIC);
AN(strings);
VSB_printf(sb, "minlen = %zu\n", ph->minlen);
VSB_printf(sb, "maxlen = %zu\n", ph->maxlen);
VSB_printf(sb, "l = %zu\n", ph->l);
VSB_printf(sb, "h1->mask = 0x%0x\n", ph->h1->mask);
VSB_printf(sb, "h1->addend = 0x%0lx\n", ph->h1->addend);
for (unsigned i = 0; i < ph->l; i++)
VSB_printf(sb, "h1->k[%u] = 0x%0lx\n", i, ph->h1->k[i]);
for (unsigned i = 0; i <= ph->h1->mask; i++) {
VSB_printf(sb, "\n");
if (!vbit_test(ph->collision, i)) {
VSB_printf(sb, "tbl[%u].idx = %u\n", i, ph->tbl[i].idx);
if (ph->tbl[i].idx != UINT_MAX)
VSB_printf(sb, "\tstrings[%u] = %s\n",
ph->tbl[i].idx,
strings[ph->tbl[i].idx]);
continue;
}
struct hash *h2 = ph->tbl[i].h2;
VSB_printf(sb, "tbl[%u].h2 = %p\n", i, h2);
if (h2 == NULL)
continue;
CHECK_OBJ(h2, HASH_MAGIC);
VSB_printf(sb, "tbl[%u].h2->mask = 0x%0x\n", i, h2->mask);
VSB_printf(sb, "tbl[%u].h2->addend = 0x%0lx\n", i, h2->addend);
for (unsigned j = 0; j < ph->l; j++)
VSB_printf(sb, "tbl[%u].h2->k[%u] = 0x%0lx\n", i, j,
h2->k[j]);
for (unsigned j = 0; j <= h2->mask; j++) {
VSB_printf(sb, "tbl[%u].h2->tbl[%u] = %u\n", i, j,
h2->tbl[j]);
if (h2->tbl[j] != UINT_MAX)
VSB_printf(sb, "\tstrings[%u] = %s\n",
h2->tbl[j], strings[h2->tbl[j]]);
}
}
VSB_finish(sb);
return (sb);
}
void
PH_Free(struct ph *ph)
{
if (ph == NULL)
return;
free(ph->inter);
CHECK_OBJ(ph, PH_MAGIC);
if (ph->tbl != NULL) {
if (ph->collision != NULL) {
for (unsigned i = 0; i <= ph->h1->mask; i++) {
if (!vbit_test(ph->collision, i))
continue;
CHECK_OBJ_NOTNULL(ph->tbl[i].h2, HASH_MAGIC);
if (ph->tbl[i].h2->tbl != NULL)
free(ph->tbl[i].h2->tbl);
if (ph->tbl[i].h2->k != NULL)
free(ph->tbl[i].h2->k);
FREE_OBJ(ph->tbl[i].h2);
}
vbit_destroy(ph->collision);
}
free(ph->tbl);
}
if (ph->h1 != NULL) {
AZ(ph->h1->tbl);
free(ph->h1->k);
FREE_OBJ(ph->h1);
}
FREE_OBJ(ph);
}
......@@ -26,18 +26,65 @@
* SUCH DAMAGE.
*/
/* Interface for perfect hashing */
#include <stdint.h>
#include <errno.h>
#include <limits.h>
#include <unistd.h>
#include "vsb.h"
/*
* A perfect hash comprises a struct ph and a table of strings, both of
* which are owned by a VMOD object. Successful lookups return the index
* of a string in the table.
*/
struct ph;
/*
* Initialize perfect hashing. Supplies a seed for random number
* generation, which should be obtained from an entropy source. Only
* needs to be called once.
*/
void PH_Init(uint32_t seed[4]);
/*
* Generate a perfect hash from a table of strings with n elements.
* strings MAY NOT be NULL, and SHALL NOT contain duplicates. n MUST be >
* 0 and <= 2^31.
*
* Returns non-NULL on success, NULL on error, except that PH_Generate()
* will probably not terminate if strings contains duplicates.
*
* On error, errno is set. errno == ERANGE if n is out of range, or may
* set for other errors (probably ENOMEM for malloc failures).
*/
struct ph * PH_Generate(char * const * const strings, unsigned n);
/*
* Return the index of subject in the table strings, with which ph was
* generated.
*
* ph MUST be generated for strings previously by PH_Generate() (or NULL).
* strings and subject MAY NOT be NULL.
*
* Returns the index of subject in strings, or UINT_MAX if subject is not
* in strings or if ph is NULL.
*/
unsigned PH_Lookup(const struct ph * const restrict ph,
char * const restrict * const restrict strings,
const char * const restrict subject);
/*
* Return a string dump of ph as generated for strings.
*
* Returns a empty buffer if ph is NULL. If ph is non-NULL, strings MAY
* NOT be NULL.
*/
struct vsb * PH_Dump(struct ph *ph, char **strings);
/*
* Free ph. Silently does nothing if ph is NULL.
*/
void PH_Free(struct ph *ph);
/*-
* Copyright (c) 2020 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Geoffrey Simmons <geoffrey.simmons@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* It isn't cryptography, but the theoretical guarantees of universal
* hashing depend on the randomness of the keys, so we need a good RNG.
*
* This is an implementation of KISS99, from George Marsaglia's post to
* sci.stat.math and sci.math on January 20, 1999. It passes all of the
* tests in TestU01.
*
* It is *not* thread-safe, due to the static state variables.
*
* http://www.ciphersbyritter.com/NEWS4/RANDC.HTM#36A5FC62.17C9CC33@stat.fsu.edu
* https://www.iro.umontreal.ca/~lecuyer/myftp/papers/testu01.pdf
*/
#include <stdint.h>
static uint32_t mwc1, mwc2, jsr, jcong;
static inline void
rnd_init(uint32_t seed[4])
{
mwc1 = seed[0];
mwc2 = seed[1];
jsr = seed[2];
jcong = seed[3];
}
#define MWC(n, x) ((n) * ((x) & 65535) + ((x) >> 16))
static inline uint32_t
rnd_nxt()
{
uint32_t mwc;
mwc1 = MWC(36969, mwc1);
mwc2 = MWC(18000, mwc2);
mwc = (mwc1 << 16) + mwc2;
jsr ^= jsr << 17;
jsr ^= jsr >> 13;
jsr ^= jsr << 5;
jcong = 69069 * jcong + 1234567;
return ((mwc ^ jcong) + jsr);
}
......@@ -106,22 +106,26 @@ usage(const char *argv, int status)
int
main(int argc, char *argv[])
{
FILE *stringsf = stdin, *csv = NULL;
FILE *stringsf = stdin, *csv = NULL, *urandom = NULL;;
char **strings = NULL, **inputs = NULL, *line, *inputf = NULL,
*csvf = NULL;
*csvf = NULL, *dumpf = NULL;
size_t lineln = LINE_MAX;
ssize_t readln;
unsigned n = 0, ninputs = 0;
struct timespec before, after, start, finish;
uint64_t ns = 0, iters, matches;
uint32_t seed[4];
int opt, do_shuf = 0, do_iters = ITERATIONS;
struct rusage rusage;
while ((opt = getopt(argc, argv, "hsc:i:n:")) != -1) {
while ((opt = getopt(argc, argv, "hsc:d:i:n:")) != -1) {
switch (opt) {
case 'c':
csvf = optarg;
break;
case 'd':
dumpf = optarg;
break;
case 'h':
usage(argv[0], EXIT_SUCCESS);
break;
......@@ -242,6 +246,26 @@ main(int argc, char *argv[])
printf("Clock resolution %ld ns\n",
before.tv_sec * BILLION + before.tv_nsec);
printf("\nInitializing perfect hashing ...\n");
errno = 0;
if ((urandom = fopen("/dev/urandom", "r")) == NULL) {
fprintf(stderr, "Cannot open /dev/urandom: %s\n",
strerror(errno));
exit(EXIT_FAILURE);
}
(void)fread(seed, sizeof(uint32_t), 4, urandom);
if (ferror(urandom)) {
fprintf(stderr, "Error reading /dev/urandom: %s\n",
strerror(errno));
exit(EXIT_FAILURE);
}
if (fclose(urandom) != 0) {
fprintf(stderr, "Error closing /dev/urandom: %s\n",
strerror(errno));
exit(EXIT_FAILURE);
}
PH_Init(seed);
printf("\nBuilding perfect hash ...\n");
errno = 0;
(void)clock_gettime(CLOCK, &before);
......@@ -262,6 +286,34 @@ main(int argc, char *argv[])
printf("Generated for %u strings in %.9f s, mean %lu ns/string\n", n,
ns * 1e-9, ns / n);
if (dumpf != NULL) {
FILE *df;
struct vsb *vsb;
printf("\nDumping hash to %s ...\n", dumpf);
vsb = PH_Dump(ph, strings);
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
errno = 0;
if ((df = fopen(dumpf, "w")) == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
if (fwrite(VSB_data(vsb), 1, VSB_len(vsb), df)
!= (unsigned)VSB_len(vsb)) {
fprintf(stderr, "Error writing to %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
VSB_destroy(&vsb);
if (fclose(df) != 0) {
fprintf(stderr, "Error closing %s: %s\n", dumpf,
strerror(errno));
exit(EXIT_FAILURE);
}
printf("... done.\n");
}
if (do_iters == 0)
exit(EXIT_SUCCESS);
......
......@@ -39,6 +39,7 @@
#include "vcl.h"
#include "vre.h"
#include "vbm.h"
#include "vrnd.h"
#include "cache/cache_director.h"
/*
......@@ -117,6 +118,7 @@ vmod_event(VRT_CTX, struct vmod_priv *priv, enum vcl_event_e e)
{
struct vsc_head *vsc_head;
struct vsc_entry *vsc_entry;
uint32_t seed[4];
ASSERT_CLI();
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
......@@ -132,6 +134,10 @@ vmod_event(VRT_CTX, struct vmod_priv *priv, enum vcl_event_e e)
vsc_head = priv->priv;
switch(e) {
case VCL_EVENT_LOAD:
AZ(VRND_RandomCrypto(seed, sizeof(seed)));
PH_Init(seed);
break;
case VCL_EVENT_DISCARD:
while (!VSLIST_EMPTY(vsc_head)) {
vsc_entry = VSLIST_FIRST(vsc_head);
......@@ -155,7 +161,7 @@ vmod_event(VRT_CTX, struct vmod_priv *priv, enum vcl_event_e e)
}
break;
default:
assert(e == VCL_EVENT_LOAD);
WRONG("Illegal event type");
}
return 0;
}
......@@ -405,8 +411,8 @@ vmod_set_compile(VRT_CTX, struct VPFX(selector_set) *set)
errno = 0;
if ((set->hash = PH_Generate(members, set->nmembers)) == NULL) {
if (errno == ERANGE)
VFAIL(ctx, "%s.compile(): perfect hash cannot be "
"generated for this set", set->vcl_name);
VFAIL(ctx, "%s.compile(): too many strings in the set",
set->vcl_name);
else
VFAIL(ctx, "%s.compile() failed: %s", set->vcl_name,
strerror(errno));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment