Commit b956c988 authored by Geoff Simmons's avatar Geoff Simmons

QP_Insert() requires that strings are added in sorted order.

The VMOD does this during .compile(), and QP_Insert() is no longer
called during .add(). The .compile() call is now required in all
cases, and it must be called before .create_stats().

This is because QP_Insert() was not correctly rotating the trie
when a set has overlapping prefixes, and a shorter prefix was
added before the longer one. With sorted order, shorter prefixes
are always added first, so rotation is unnecessary.
parent b0b250a2
......@@ -223,6 +223,14 @@ QP_Insert(struct qp_y * * restrict root, unsigned idx,
y = y->branch[n];
continue;
}
}
assert(s[i] != *c);
AN(*c);
if (i == y->len && y->branch != NULL) {
AN(y->bitmap);
bitmap = getbits(y, *c);
y_new = y_leaf_alloc(idx, c, b);
if (y_new == NULL)
return (-1);
......@@ -237,8 +245,6 @@ QP_Insert(struct qp_y * * restrict root, unsigned idx,
return (0);
}
assert(s[i] != *c);
if (y->branch == NULL) {
/*
* Current node is a leaf.
......@@ -423,7 +429,7 @@ QP_Prefixes(const struct qp_y * const restrict root,
for (const struct qp_y *y = root;;) {
size_t l;
uint16_t bitmap;
int idx = -1, branches;
int idx = -1;
CHECK_OBJ(y, QP_Y_MAGIC);
l = y->off + y->len;
......@@ -445,30 +451,6 @@ QP_Prefixes(const struct qp_y * const restrict root,
ANIB(idx);
}
/*
* Before we advance to the next branch, check if the
* current prefix matches any of the other branches from
* this node (this is the overlapping prefix case).
*/
branches = popcount(y->bitmap);
for (int i = 0; i < branches; i++) {
const struct qp_y *yy;
if (i == idx)
continue;
yy = y->branch[i];
CHECK_OBJ_NOTNULL(yy, QP_Y_MAGIC);
if (yy->off + yy-> len != l)
continue;
if (yy->term) {
if (strncmp(subject, strings[yy->idx], l) != 0)
return (0);
assert(l != len);
if (update_match(match, yy->idx, len, l) != 0)
return (-1);
}
}
if (idx == -1)
return (0);
y = y->branch[idx];
......
# looks like -*- vcl -*-
varnishtest "shuffle the first 219 lines from /usr/dict/words starting with Al"
# This has turned out to be a critical test case for QP_Prefixes().
varnish v1 -vcl {
import ${vmod_selector};
import std;
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new s = selector.set();
s.add("Alaska's");
s.add("Aldrin's");
s.add("Albania's");
s.add("Alexandria's");
s.add("Alcyone");
s.add("Alma's");
s.add("Ali");
s.add("Algenib");
s.add("Allyson's");
s.add("Aladdin");
s.add("Allegheny's");
s.add("Aldo");
s.add("Alleghenies's");
s.add("Algol");
s.add("Alford's");
s.add("Alexei's");
s.add("Alaskans");
s.add("Alabaman's");
s.add("Alamo's");
s.add("Algeria's");
s.add("Alejandra's");
s.add("Alabama's");
s.add("Alkaid's");
s.add("Alcestis");
s.add("Aldo's");
s.add("Ali's");
s.add("Alejandra");
s.add("Alar's");
s.add("Aleut");
s.add("Algiers");
s.add("Allyson");
s.add("Algieba's");
s.add("Al's");
s.add("Alighieri's");
s.add("Alex's");
s.add("Alamogordo");
s.add("Alexander");
s.add("Alcuin");
s.add("Alnilam's");
s.add("Allen's");
s.add("Alfredo's");
s.add("Almohad");
s.add("Aldrin");
s.add("Alembert");
s.add("Alderamin's");
s.add("Aline");
s.add("Alcatraz");
s.add("Alabama");
s.add("Alexandra");
s.add("Algeria");
s.add("Allstate's");
s.add("Allegra");
s.add("Almach's");
s.add("Albuquerque");
s.add("Alonzo's");
s.add("Albert");
s.add("Alaska");
s.add("Alisa's");
s.add("Alfonzo's");
s.add("Alcatraz's");
s.add("Allende");
s.add("Albany's");
s.add("Albert's");
s.add("Alcmena's");
s.add("Alejandro's");
s.add("Allegheny");
s.add("Alberio");
s.add("Alaric");
s.add("Algonquians");
s.add("Almach");
s.add("Alberto");
s.add("Albireo");
s.add("Albany");
s.add("Aladdin's");
s.add("Almoravid");
s.add("Alger's");
s.add("Alcmena");
s.add("Alexis");
s.add("Alabaman");
s.add("Albee's");
s.add("Alison");
s.add("Alamo");
s.add("Alisha");
s.add("Alec's");
s.add("Algonquin's");
s.add("Aleichem");
s.add("Alice's");
s.add("Alissa's");
s.add("Allahabad");
s.add("Alfred's");
s.add("Alfred");
s.add("Alcibiades's");
s.add("Aldan");
s.add("Alberta");
s.add("Albanian's");
s.add("Alpert");
s.add("Alden");
s.add("Algonquin");
s.add("Allie's");
s.add("Alejandro");
s.add("Alan's");
s.add("Alcott");
s.add("Algol's");
s.add("Aline's");
s.add("Algerian's");
s.add("Algerian");
s.add("Alisa");
s.add("Albanians");
s.add("Alcoa");
s.add("Alfredo");
s.add("Alcestis's");
s.add("Alan");
s.add("Alger");
s.add("Albee");
s.add("Alberio's");
s.add("Aleutian");
s.add("Alexandra's");
s.add("Allan");
s.add("Alistair's");
s.add("Alfonso's");
s.add("Alisha's");
s.add("Alistair");
s.add("Alonzo");
s.add("Allison's");
s.add("Almoravid's");
s.add("Al");
s.add("Almaty");
s.add("Aleppo");
s.add("Alhambra's");
s.add("Alfreda's");
s.add("Aleichem's");
s.add("Almohad's");
s.add("Alexander's");
s.add("Alison's");
s.add("Alderamin");
s.add("Alnilam");
s.add("Allan's");
s.add("Allison");
s.add("Alabamian");
s.add("Allentown");
s.add("Algerians");
s.add("Alexis's");
s.add("Alleghenies");
s.add("Alicia's");
s.add("Alabamians");
s.add("Algiers's");
s.add("Alec");
s.add("Albireo's");
s.add("Albuquerque's");
s.add("Algonquian's");
s.add("Algenib's");
s.add("Allen");
s.add("Allegra's");
s.add("Alcott's");
s.add("Alba");
s.add("Alexei");
s.add("Alba's");
s.add("Alabamian's");
s.add("Alioth");
s.add("Alighieri");
s.add("Almighty");
s.add("Alnitak");
s.add("Alcuin's");
s.add("Algonquian");
s.add("Alhena");
s.add("Alaskan's");
s.add("Almighty's");
s.add("Alcyone's");
s.add("Albigensian's");
s.add("Aldebaran's");
s.add("Alar");
s.add("Aldan's");
s.add("Allie");
s.add("Alioth's");
s.add("Alfreda");
s.add("Alexandria");
s.add("Aleppo's");
s.add("Allahabad's");
s.add("Allende's");
s.add("Alex");
s.add("Alfonzo");
s.add("Allah's");
s.add("Alhena's");
s.add("Alberto's");
s.add("Allentown's");
s.add("Alamogordo's");
s.add("Alnitak's");
s.add("Allah");
s.add("Alcoa's");
s.add("Albanian");
s.add("Alden's");
s.add("Aleut's");
s.add("Alabamans");
s.add("Algieba");
s.add("Alaric's");
s.add("Alembert's");
s.add("Alma");
s.add("Alcindor's");
s.add("Almaty's");
s.add("Alcibiades");
s.add("Alford");
s.add("Albigensian");
s.add("Alana's");
s.add("Albania");
s.add("Alfonso");
s.add("Alhambra");
s.add("Aldebaran");
s.add("Allstate");
s.add("Alissa");
s.add("Aleutian's");
s.add("Alberta's");
s.add("Alicia");
s.add("Albion's");
s.add("Albion");
s.add("Alaskan");
s.add("Alana");
s.add("Alcindor");
s.add("Alkaid");
s.add("Alice");
s.compile();
}
sub vcl_recv {
return (synth(200));
}
sub vcl_synth {
std.timestamp("BeforeMatch");
set resp.http.Match = s.hasprefix(req.http.Word);
std.timestamp("AfterMatch");
set resp.http.N = s.nmatches();
return (deliver);
}
} -start
client c1 {
txreq -hdr "Word: Ali"
rxresp
expect resp.status == 200
expect resp.http.N == 2
} -run
......@@ -142,6 +142,7 @@ varnish v1 -vcl {
s.add("foobarbaz", backend=b2);
s.add("foobar", backend=b3);
s.add("foo", backend=b4);
s.compile();
}
sub vcl_recv {
......
......@@ -103,6 +103,12 @@ usage(const char *argv, int status)
exit(status);
}
static int
cmp(const void *a, const void *b)
{
return( strcmp(* (char * const *)a, * (char * const *)b) );
}
int
main(int argc, char *argv[])
{
......@@ -254,7 +260,13 @@ main(int argc, char *argv[])
printf("Clock resolution %ld ns\n",
before.tv_sec * BILLION + before.tv_nsec);
printf("Building trie ...\n");
printf("\nSorting %u strings ...\n", n);
(void)clock_gettime(CLOCK, &before);
qsort(strings, n, sizeof(*strings), cmp);
(void)clock_gettime(CLOCK, &after);
printf("... sorted in %.9f s\n", tdiff(&before, &after) * 1e-9);
printf("\nBuilding trie ...\n");
for (unsigned i = 0; i < n; i++) {
int ret;
......
......@@ -791,6 +791,7 @@ varnish v1 -vcl {
s.add("FOOBARBAZ");
s.add("FOOBAR");
s.add("FOO");
s.compile();
}
sub vcl_recv {
......@@ -971,6 +972,7 @@ varnish v1 -vcl {
s.add("leTTeR"); # 5
s.add("LETtEreD"); # 6
s.add("letTERING"); # 7
s.compile();
}
sub vcl_recv {
......
......@@ -134,6 +134,7 @@ varnish v1 -vcl {
s.add("foobarbaz");
s.add("foobar");
s.add("foo");
s.compile();
}
sub vcl_recv {
......
......@@ -127,6 +127,7 @@ varnish v1 -vcl {
s.add("baz", integer=2);
s.add("quux", integer=-1);
s.add("foobar", integer=-2);
s.compile();
}
sub vcl_recv {
......@@ -188,6 +189,7 @@ varnish v1 -vcl {
s.add("foobarbaz", integer=2);
s.add("foobar", integer=3);
s.add("foo", integer=4);
s.compile();
}
sub vcl_recv {
......@@ -233,6 +235,7 @@ varnish v1 -vcl {
s.add("foobarbaz", integer=2);
s.add("foobar", integer=3);
s.add("foo", integer=4);
s.compile();
}
sub vcl_recv {
......@@ -292,6 +295,7 @@ varnish v1 -vcl {
s.add("foobarbaz", integer=2);
s.add("foobar", integer=3);
s.add("foo", integer=4);
s.compile();
}
sub vcl_recv {
......
......@@ -814,7 +814,7 @@ client c1 {
expect resp.http.Match == "false"
} -run
varnish v1 -errvcl {vmod selector failure: t.add(): "foo" added more than once} {
varnish v1 -errvcl {vmod selector failure: t.compile(): "foo" added more than once} {
import ${vmod_selector};
backend b { .host = "${bad_ip}"; }
......@@ -822,6 +822,7 @@ varnish v1 -errvcl {vmod selector failure: t.add(): "foo" added more than once}
new t = selector.set();
t.add("foo");
t.add("foo");
t.compile();
}
}
......
......@@ -13,6 +13,7 @@ varnish v1 -vcl {
s.add("bar");
s.add("baz");
s.add("quux");
s.compile();
}
sub vcl_recv {
......@@ -146,6 +147,7 @@ varnish v1 -vcl {
s.add("foobarbaz");
s.add("foobar");
s.add("foo");
s.compile();
}
sub vcl_recv {
......@@ -323,6 +325,7 @@ varnish v1 -vcl {
s.add("foobar");
s.add("foobarbaz");
s.add("foobarbazquux");
s.compile();
}
sub vcl_recv {
......@@ -577,6 +580,7 @@ varnish v1 -vcl {
s.add("plaintiffs"); # 75
s.add("plainer"); # 76
s.add("plainest"); # 77
s.compile();
}
sub vcl_recv {
......@@ -1454,8 +1458,13 @@ varnish v1 -vcl {
sub vcl_init {
new t = selector.set();
t.add("foo");
t.compile();
new n = selector.set();
n.compile();
new c = selector.set();
c.add("bar");
}
sub vcl_recv {
......@@ -1465,6 +1474,7 @@ varnish v1 -vcl {
sub vcl_synth {
set resp.http.Undef = t.hasprefix(req.http.No-Such-Header);
set resp.http.Nil = n.hasprefix("foo");
set resp.http.Not-Compiled = c.hasprefix("bar");
return (deliver);
}
}
......@@ -1473,6 +1483,7 @@ logexpect l1 -v v1 -d 0 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error {^vmod selector error: t\.hasprefix\(\): subject string is NULL$}
expect * = VCL_Error {^vmod selector error: n\.hasprefix\(\): no entries were added$}
expect * = VCL_Error {^vmod selector error: c\.hasprefix\(\): set was not compiled$}
expect * = End
} -start
......@@ -1482,6 +1493,7 @@ client c1 {
expect resp.status == 200
expect resp.http.Undef == "false"
expect resp.http.Nil == "false"
expect resp.http.Not-Compiled == "false"
} -run
logexpect l1 -wait
......@@ -1494,6 +1506,7 @@ varnish v1 -vcl {
sub vcl_init {
new s = selector.set();
s.add("foo");
s.compile();
}
sub vcl_recv {
......
......@@ -219,6 +219,7 @@ varnish v1 -vcl {
s.add("foobarbaz", regex="bar");
s.add("foobar", regex="baz");
s.add("foo", regex="quux");
s.compile();
}
sub vcl_recv {
......
......@@ -12,6 +12,7 @@ varnish v1 -vcl {
s.add("bar");
s.add("baz");
s.add("quux");
s.compile();
s.create_stats();
}
} -start
......@@ -37,6 +38,7 @@ varnish v1 -vcl {
p.add("foobar");
p.add("foobarbaz");
p.add("foobarbazquux");
p.compile();
p.create_stats();
# No .create_stats() call.
......@@ -49,6 +51,7 @@ varnish v1 -vcl {
# Calling .create_stats() on an empty set is
# pointless, but not an error.
new e = selector.set();
e.compile();
e.create_stats();
}
}
......@@ -197,6 +200,7 @@ varnish v1 -vcl {
words.add("electrode's");
words.add("disposition");
words.add("Rena's");
words.compile();
words.create_stats();
}
}
......@@ -251,3 +255,15 @@ logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
expect * = VCL_Error {^vmod selector failure: n\.create_stats\(\) may only be called in vcl_init$}
expect * = End
} -run
varnish v1 -errvcl {vmod selector failure: s.create_stats(): set was not compiled} {
import ${vmod_selector};
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new s = selector.set();
s.add("foo");
s.create_stats();
s.compile();
}
}
......@@ -134,6 +134,7 @@ varnish v1 -vcl {
s.add("foobarbaz", string="2");
s.add("foobar", string="3");
s.add("foo", string="4");
s.compile();
}
sub vcl_recv {
......
......@@ -14,6 +14,7 @@ varnish v1 -vcl {
s.add("baz", regex="$");
s.add("quux", regex=".*");
s.add("foobar", regex=":.*");
s.compile();
}
sub vcl_recv {
......@@ -95,6 +96,7 @@ varnish v1 -vcl {
s.add("6", regex="ping");
s.add("7", regex="(?<=[&\?])(foo|bar)=[^&]+(?:&|$)");
s.add("8", regex="\??(p|pi)=.*?(&|$)");
s.compile();
}
sub vcl_recv {
......@@ -137,6 +139,7 @@ varnish v1 -vcl {
sub vcl_init {
new s = selector.set();
s.add("foo", regex="bar");
s.compile();
}
sub vcl_recv {
......@@ -181,12 +184,14 @@ varnish v1 -vcl {
rewrite1.add("/alpha/beta", regex="(\?.*)\bfoo=[^&]+&?(.*)$");
rewrite1.add("/delta/gamma", regex="(\?.*)\bbar=[^&]+&?(.*)$");
rewrite1.add("/epsilon/zeta", regex="(\?.*)\bbaz=[^&]+&?(.*)$");
rewrite1.compile();
new rewrite2 = selector.set();
rewrite2.add("/foo/", regex="^(/foo)/([^/]+)/([^/]+)/");
rewrite2.add("/foo/bar/", regex="^(/foo/bar)/([^/]+)/([^/]+)/");
rewrite2.add("/foo/bar/baz/",
regex="^(/foo/bar/baz)/([^/]+)/([^/]+)/");
rewrite2.compile();
}
sub vcl_recv {
......
......@@ -259,7 +259,6 @@ vmod_set_add(VRT_CTX, struct vmod_selector_set *set,
vre_t *re = NULL;
const char *error;
int erroffset;
char **members;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(set, VMOD_SELECTOR_SET_MAGIC);
......@@ -284,7 +283,6 @@ vmod_set_add(VRT_CTX, struct vmod_selector_set *set,
AN(set->members);
set->members[n - 1] = strdup(args->arg1);
AN(set->members[n - 1]);
members = set->members;
if (!set->case_sensitive) {
set->lomembers = realloc(set->lomembers,
......@@ -294,18 +292,6 @@ vmod_set_add(VRT_CTX, struct vmod_selector_set *set,
AN(set->lomembers[n - 1]);
for (char *m = set->lomembers[n-1]; *m; m++)
*m = tolower(*m);
members = set->lomembers;
}
errno = 0;
if (QP_Insert(&set->origo, n - 1, members) != 0) {
if (errno == EINVAL)
VFAIL(ctx, "%s.add(): \"%s\" added more than once",
set->vcl_name, args->arg1);
else
VFAIL(ctx, "%s.add(\"%s\") failed: %s", set->vcl_name,
args->arg1, strerror(errno));
return;
}
if (args->valid_regex) {
......@@ -347,12 +333,27 @@ vmod_set_add(VRT_CTX, struct vmod_selector_set *set,
set->table[n - 1] = entry;
}
struct memberidx {
char *member;
unsigned n;
};
static int
cmp(const void *a, const void *b)
{
const struct memberidx *aa = a, *bb = b;
return( strcmp(aa->member, bb->member) );
}
VCL_VOID
vmod_set_compile(VRT_CTX, struct VPFX(selector_set) *set)
{
char **members;
unsigned sz;
struct memberidx *idx;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(ctx->ws, WS_MAGIC);
CHECK_OBJ_NOTNULL(set, VMOD_SELECTOR_SET_MAGIC);
if ((ctx->method & VCL_MET_INIT) == 0) {
......@@ -373,6 +374,34 @@ vmod_set_compile(VRT_CTX, struct VPFX(selector_set) *set)
return;
}
sz = WS_ReserveSize(ctx->ws, sizeof(*idx) * set->nmembers);
if (sz == 0) {
VFAIL(ctx, "%s.compile(): insufficient workspace",
set->vcl_name);
return;
}
idx = (struct memberidx *)WS_Front(ctx->ws);
for (unsigned i = 0; i < set->nmembers; i++) {
idx[i].n = i;
idx[i].member = members[i];
}
qsort(idx, set->nmembers, sizeof(*idx), cmp);
for (unsigned i = 0; i < set->nmembers; i++) {
errno = 0;
if (QP_Insert(&set->origo, idx[i].n, members) != 0) {
if (errno == EINVAL)
VFAIL(ctx, "%s.compile(): \"%s\" added more "
"than once", set->vcl_name, members[i]);
else
VFAIL(ctx, "%s.compile(\"%s\") failed: %s",
set->vcl_name, members[i],
strerror(errno));
WS_Release(ctx->ws, sz);
return;
}
}
WS_Release(ctx->ws, sz);
errno = 0;
if ((set->hash = PH_Generate(members, set->nmembers)) == NULL) {
if (errno == ERANGE)
......@@ -484,8 +513,11 @@ vmod_set_hasprefix(VRT_CTX, struct vmod_selector_set *set, VCL_STRING subject)
set->vcl_name);
return (0);
}
AN(set->origo);
if (set->origo == NULL) {
VERR(ctx, "%s.hasprefix(): set was not compiled",
set->vcl_name);
return (0);
}
if (subject == NULL) {
VERR(ctx, "%s.hasprefix(): subject string is NULL",
set->vcl_name);
......@@ -845,6 +877,11 @@ vmod_set_create_stats(VRT_CTX, struct vmod_selector_set *set,
if (set->nmembers == 0)
memset(&stats, 0, sizeof(stats));
else if (set->origo == NULL) {
VFAIL(ctx, "%s.create_stats(): set was not compiled",
set->vcl_name);
return;
}
else {
char **members = set->members;
if (!set->case_sensitive)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment