Commit 151a9b17 authored by Geoff Simmons's avatar Geoff Simmons

Add the cost() function.

parent 5760ef8d
......@@ -53,6 +53,7 @@ SYNOPSIS
[, <regex options>])
STRING re2.extract(STRING pattern, STRING text, STRING rewrite
[, <regex options>])
INT re2.cost(STRING pattern [, <regex options>])
# set object interface
new OBJECT = re2.set([ENUM anchor] [, <regex options>])
......@@ -893,6 +894,41 @@ Example::
"\1:\2");
.. _func_cost:
cost(...)
---------
::
INT cost(
STRING pattern,
BOOL utf8=0,
BOOL posix_syntax=0,
BOOL longest_match=0,
INT max_mem=8388608,
BOOL literal=0,
BOOL never_nl=0,
BOOL dot_nl=0,
BOOL never_capture=0,
BOOL case_sensitive=1,
BOOL perl_classes=0,
BOOL word_boundary=0,
BOOL one_line=0
)
Like the ``.cost()`` method above, return a numeric measurement > 0
from the RE2 library for ``pattern`` with the given options. More
complex regexen have a higher cost than less complex regexen.
Fails and returns -1 if ``pattern`` cannot be compiled.
Example::
std.log("simple cost=" + re2.cost("simple")
+ " complex cost=" + re2.cost("complex{1,128}"));
.. _obj_set:
set(...)
......
......@@ -21,6 +21,12 @@ varnish v1 -vcl {
set resp.http.C-Simple = simple.cost();
set resp.http.C-Medium = medium.cost();
set resp.http.C-Complex = complex.cost();
set resp.http.C-Simple-F = re2.cost("simple regexp");
set resp.http.C-Medium-F = re2.cost("medium.*regexp");
set resp.http.C-Complex-F = re2.cost("complex.{1,128}regexp");
set resp.http.Fail-F = re2.cost("(");
}
} -start
......@@ -34,4 +40,14 @@ client c1 {
expect resp.http.C-Simple > 0
expect resp.http.C-Medium > resp.http.C-Simple
expect resp.http.C-Complex > resp.http.C-Medium
expect resp.http.C-Simple-F == resp.http.C-Simple
expect resp.http.C-Medium-F == resp.http.C-Medium
expect resp.http.C-Complex-F == resp.http.C-Complex
expect resp.http.Fail-F == "-1"
} -run
logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error {^vmod re2 error: re2\.cost\("\("\): Cannot compile: }
expect * = End
} -run
......@@ -468,23 +468,29 @@ vmod_regex_extract(VRT_CTX, struct vmod_re2_regex *re, VCL_STRING text,
return rewrite_method(ctx, EXTRACT, re, text, rewrite, fallback);
}
VCL_INT
vmod_regex_cost(VRT_CTX, struct vmod_re2_regex *re)
static VCL_INT
cost(VRT_CTX, vre2 * const restrict vre2, const char * const restrict context)
{
int cost;
const char *err;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
if ((err = vre2_cost(re->vre2, &cost)) != NULL) {
VERR(ctx, "%s.cost(): Cannot retrieve cost: %s", re->vcl_name,
if ((err = vre2_cost(vre2, &cost)) != NULL) {
VERR(ctx, "%s.cost(): Cannot retrieve cost: %s", context,
err);
return (-1);
}
return cost;
}
VCL_INT
vmod_regex_cost(VRT_CTX, struct vmod_re2_regex *re)
{
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
return cost(ctx, re->vre2, re->vcl_name);
}
/* Regex function interface */
#define ERR_PREFIX "re2.match(pattern=\"%.40s\", text=\"%.40s\"): "
......@@ -756,6 +762,31 @@ vmod_quotemeta(VRT_CTX, VCL_STRING unquoted, VCL_STRING fallback)
#undef ERR_PREFIX
VCL_INT
vmod_cost(VRT_CTX, VCL_STRING pattern, REGEX_OPTS)
{
vre2 *vre2 = NULL;
const char *err;
VCL_INT kost;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
if ((err = vre2_init(&vre2, pattern, utf8, posix_syntax, longest_match,
max_mem, literal, never_nl, dot_nl, never_capture,
case_sensitive, perl_classes, word_boundary,
one_line))
!= NULL) {
VERR(ctx, "re2.cost(\"%.40s\"): Cannot compile: %s", pattern,
err);
vre2_fini(&vre2);
return -1;
}
kost = cost(ctx, vre2, "re2");
vre2_fini(&vre2);
return kost;
}
VCL_STRING
vmod_version(const struct vrt_ctx *ctx __attribute__((unused)))
{
......
......@@ -40,6 +40,7 @@ SYNOPSIS
[, <regex options>])
STRING re2.extract(STRING pattern, STRING text, STRING rewrite
[, <regex options>])
INT re2.cost(STRING pattern [, <regex options>])
# set object interface
new OBJECT = re2.set([ENUM anchor] [, <regex options>])
......@@ -701,6 +702,23 @@ Example::
set beresp.http.X-Query = re2.extract(beresp.http.X-Params, bereq.url,
"\1:\2");
$Function INT cost(STRING pattern, BOOL utf8=0, BOOL posix_syntax=0,
BOOL longest_match=0, INT max_mem=8388608, BOOL literal=0,
BOOL never_nl=0, BOOL dot_nl=0, BOOL never_capture=0,
BOOL case_sensitive=1, BOOL perl_classes=0,
BOOL word_boundary=0, BOOL one_line=0)
Like the ``.cost()`` method above, return a numeric measurement > 0
from the RE2 library for ``pattern`` with the given options. More
complex regexen have a higher cost than less complex regexen.
Fails and returns -1 if ``pattern`` cannot be compiled.
Example::
std.log("simple cost=" + re2.cost("simple")
+ " complex cost=" + re2.cost("complex{1,128}"));
$Object set(ENUM { none, start, both } anchor="none", BOOL utf8=0,
BOOL posix_syntax=0, BOOL longest_match=0, INT max_mem=8388608,
BOOL literal=0, BOOL never_nl=0, BOOL dot_nl=0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment