Commit ac489fa1 authored by Geoff Simmons's avatar Geoff Simmons

Hash checks strings against min and max length for the set.

strlen() is also cheap if it has a SIMD implementation, so we can
afford this optimization to reject some strings immediately.
parent 06f4b2cb
......@@ -49,6 +49,8 @@ struct ph {
#define PH_MAGIC 0x00cd8c1d
int32_t *inter;
unsigned *tbl;
size_t minlen;
size_t maxlen;
int32_t mask;
};
......@@ -108,6 +110,7 @@ len_desc_cmp(const void *b1, const void *b2)
struct ph *
PH_Generate(char * const * const strings, unsigned n)
{
size_t min = SIZE_MAX, max = 0;
int *found = NULL;
struct bucket *buckets = NULL;
int32_t *inter = NULL;
......@@ -138,6 +141,7 @@ PH_Generate(char * const * const strings, unsigned n)
goto exit;
for (i = 0; i < n; i++) {
size_t len = strlen(strings[i]);
uint32_t h = hash(FNV32_OFFSET_BASIS, strings[i]) & mask;
assert(h < sz);
......@@ -149,6 +153,11 @@ PH_Generate(char * const * const strings, unsigned n)
goto exit;
buckets[h].idx[buckets[h].n] = i;
buckets[h].n++;
if (len < min)
min = len;
if (len > max)
max = len;
}
qsort(buckets, sz, sizeof(*buckets), len_desc_cmp);
......@@ -209,6 +218,8 @@ PH_Generate(char * const * const strings, unsigned n)
ph->inter = inter;
ph->tbl = tbl;
ph->mask = mask;
ph->minlen = min;
ph->maxlen = max;
exit:
AN(buckets);
......@@ -233,6 +244,7 @@ PH_Lookup(const struct ph * const restrict ph,
char * const restrict * const restrict strings,
const char * const restrict subject)
{
size_t len;
int32_t h;
unsigned idx;
......@@ -243,6 +255,12 @@ PH_Lookup(const struct ph * const restrict ph,
if (ph == NULL)
return (UINT_MAX);
len = strlen(subject);
if (len < ph->minlen)
return (UINT_MAX);
if (len > ph->maxlen)
return (UINT_MAX);
h = ph->inter[hash(FNV32_OFFSET_BASIS, subject) & ph->mask];
if (h < 0)
idx = ph->tbl[-h - 1];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment