Commit 729b8c44 authored by Geoff Simmons's avatar Geoff Simmons

Remove the byte-to-byte compares in match and prefix searches.

Vector extensions are common hardware now, as are C libraries that
use vector instructions to implement functions like memcmp(). So
we hand off compares to the lib to get the advantage.

For the same reason, we can afford to call strlen() on the subject
string to locate the terminating null, rather than scan for it.

Also, the match function descends through the trie to find a
potential match, and does the comparison only then, as is common
for trie/critbit/patricia implementations.
parent f95af2f6
......@@ -236,34 +236,25 @@ PT_Lookup(const struct pt_y * const restrict root,
char * const restrict * const restrict strings,
const char * const restrict subject)
{
const struct pt_y *y = root;
const unsigned char *c = (unsigned char *) subject;
size_t len;
AN(strings);
AN(subject);
len = strlen(subject);
while (y != NULL) {
const unsigned char *s;
for (const struct pt_y *y = root; y != NULL;) {
unsigned char b;
unsigned short i;
CHECK_OBJ_NOTNULL(y, PT_Y_MAGIC);
s = (unsigned char *)strings[y->idx] + y->off;
AN(s);
for (i = 0; *c != '\0' && i < y->len && s[i] == *c; i++)
c++;
size_t l;
if (s[i] == '\0' && *c == '\0')
CHECK_OBJ(y, PT_Y_MAGIC);
l = y->off + y->len;
if (l > len)
return UINT_MAX;
if (l == len && memcmp(subject, strings[y->idx], len) == 0)
return y->idx;
if (i == y->len) {
b = (y->bitmask & *c) != 0;
assert(b < 2);
y = y->leaf[b];
continue;
}
return (UINT_MAX);
b = (y->bitmask & subject[l]) != 0;
assert(b < 2);
y = y->leaf[b];
}
return (UINT_MAX);
......@@ -272,21 +263,22 @@ PT_Lookup(const struct pt_y * const restrict root,
static int
pt_search(const struct pt_y * const restrict y,
char * const restrict * const restrict strings,
const unsigned char * restrict c,
const unsigned char * restrict subject, size_t len,
struct match_data * const restrict match)
{
const unsigned char * restrict s;
unsigned short i;
size_t l;
if (y == NULL)
return (0);
CHECK_OBJ(y, PT_Y_MAGIC);
s = (unsigned char *)strings[y->idx] + y->off;
for (i = 0; *c != '\0' && i < y->len && s[i] == *c; i++)
c++;
if (s[i] == '\0') {
l = y->off + y->len;
if (l > len)
return (0);
if (memcmp(subject + y->off, strings[y->idx] + y->off, y->len) != 0)
return (0);
if (strings[y->idx][l] == '\0') {
if (match->n == match->limit)
return (-1);
match->indices[match->n] = y->idx;
......@@ -295,18 +287,15 @@ pt_search(const struct pt_y * const restrict y,
match->min = y->idx;
if (y->idx > match->max)
match->max = y->idx;
if (*c == '\0') {
if (l == len) {
match->exact = y->idx;
return (0);
}
}
if (i < y->len)
return (0);
if (pt_search(y->leaf[0], strings, c, match) != 0)
if (pt_search(y->leaf[0], strings, subject, len, match) != 0)
return (-1);
if (pt_search(y->leaf[1], strings, c, match) != 0)
if (pt_search(y->leaf[1], strings, subject, len, match) != 0)
return (-1);
return (0);
......@@ -318,6 +307,8 @@ PT_Prefixes(const struct pt_y * const restrict root,
const char * const restrict subject,
struct match_data * const restrict match)
{
size_t len;
CHECK_OBJ_NOTNULL(match, MATCH_DATA_MAGIC);
AN(match->indices);
AN(match->limit);
......@@ -329,7 +320,8 @@ PT_Prefixes(const struct pt_y * const restrict root,
match->max = 0;
match->exact = UINT_MAX;
return (pt_search(root, strings, (unsigned char *)subject, match));
len = strlen(subject);
return (pt_search(root, strings, (unsigned char *)subject, len, match));
}
void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment