Commit 39bef809 authored by Geoff Simmons's avatar Geoff Simmons

Hashing counts down from the length of the string.

May be advantageous for loop/branch prediction.
parent 105a1327
......@@ -65,13 +65,13 @@ struct bucket {
/* FNV-1a 64-bit with xor-folding */
static inline uint32_t
hash(uint32_t h, const char *s, unsigned bits, unsigned mask)
hash(uint32_t h, const char *s, size_t len, unsigned bits, unsigned mask)
{
uint64_t h64 = h;
if (h64 == 0)
h64 = FNV64_OFFSET_BASIS;
while (*s) {
while (len--) {
h64 ^= (uint64_t)*s++;
h64 *= FNV64_PRIME;
......@@ -110,7 +110,7 @@ len_desc_cmp(const void *b1, const void *b2)
struct ph *
PH_Generate(char * const * const strings, unsigned n)
{
size_t min = SIZE_MAX, max = 0;
size_t min = SIZE_MAX, max = 0, len;
int *found = NULL;
struct bucket *buckets = NULL;
int32_t *inter = NULL;
......@@ -144,8 +144,10 @@ PH_Generate(char * const * const strings, unsigned n)
goto exit;
for (i = 0; i < n; i++) {
size_t len = strlen(strings[i]);
uint32_t h = hash(0, strings[i], bits, mask);
uint32_t h;
len = strlen(strings[i]);
h = hash(0, strings[i], len, bits, mask);
assert(h < sz);
errno = 0;
......@@ -173,8 +175,10 @@ PH_Generate(char * const * const strings, unsigned n)
memset(found, 0, sz * sizeof(*found));
for (int j = 0; j < bucket->n && m < UINT32_MAX; j++) {
uint32_t h = hash(m, strings[bucket->idx[j]], bits,
mask);
uint32_t h;
len = strlen(strings[bucket->idx[j]]);
h = hash(m, strings[bucket->idx[j]], len, bits, mask);
assert(h < sz);
if (tbl[h] != UINT_MAX || found[h] != 0) {
m++;
......@@ -189,10 +193,13 @@ PH_Generate(char * const * const strings, unsigned n)
goto exit;
}
inter[hash(0, strings[bucket->idx[0]], bits, mask)] = m;
for (int j = 0; j < bucket->n; j++)
tbl[hash(m, strings[bucket->idx[j]], bits, mask)]
len = strlen(strings[bucket->idx[0]]);
inter[hash(0, strings[bucket->idx[0]], len, bits, mask)] = m;
for (int j = 0; j < bucket->n; j++) {
len = strlen(strings[bucket->idx[j]]);
tbl[hash(m, strings[bucket->idx[j]], len, bits, mask)]
= bucket->idx[j];
}
}
/* Reuse found[] for a list of free indices in tbl. */
......@@ -208,8 +215,9 @@ PH_Generate(char * const * const strings, unsigned n)
for (int n = 0; i < sz && buckets[i].n > 0; n++, i++) {
struct bucket *bucket = &buckets[i];
len = strlen(strings[bucket->idx[0]]);
tbl[found[n]] = bucket->idx[0];
inter[hash(0, strings[bucket->idx[0]], bits, mask)]
inter[hash(0, strings[bucket->idx[0]], len, bits, mask)]
= -found[n] - 1;
}
......@@ -264,11 +272,11 @@ PH_Lookup(const struct ph * const restrict ph,
if (len > ph->maxlen)
return (UINT_MAX);
h = ph->inter[hash(0, subject, ph->bits, ph->mask)];
h = ph->inter[hash(0, subject, len, ph->bits, ph->mask)];
if (h < 0)
idx = ph->tbl[-h - 1];
else
idx = ph->tbl[hash(h, subject, ph->bits, ph->mask)];
idx = ph->tbl[hash(h, subject, len, ph->bits, ph->mask)];
if (idx == UINT_MAX || strcmp(subject, strings[idx]) != 0)
return (UINT_MAX);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment