Commit 12719aa3 authored by Geoff Simmons's avatar Geoff Simmons

Add the .cost() method.

parent bdcdc921
......@@ -37,6 +37,7 @@ SYNOPSIS
STRING <obj>.sub(STRING text, STRING rewrite)
STRING <obj>.suball(STRING text, STRING rewrite)
STRING <obj>.extract(STRING text, STRING rewrite)
INT <obj>.cost()
# regex function interface
BOOL re2.match(STRING pattern, STRING subject [, <regex options>])
......@@ -606,6 +607,23 @@ Example::
set resp.http.X-UUCP = email.extract("boris@kremvax.ru", "\2!\1");
}
.. _func_regex.cost:
INT xregex.cost()
-----------------
Return a numeric measurement > 0 for this regex object from the RE2
library. According to the RE2 documentation:
... a very approximate measure of a regexp's "cost". Larger numbers
are more expensive than smaller numbers.
The absolute numeric values are opaque and not relevant, but they are
meaningful relative to one another -- more complex regexen have a
higher cost than less complex regexen. This may be useful during
development and optimization of regular expressions.
regex functional interface
==========================
......
# looks like -*- vcl -*-
varnishtest "cost() function"
varnish v1 -vcl {
import ${vmod_re2};
backend b { .host = "${bad_ip}"; }
sub vcl_init {
# Tests from re2 testing/re2_test.cc
new simple = re2.regex("simple regexp");
new medium = re2.regex("medium.*regexp");
new complex = re2.regex("complex.{1,128}regexp");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.C-Simple = simple.cost();
set resp.http.C-Medium = medium.cost();
set resp.http.C-Complex = complex.cost();
}
} -start
client c1 {
txreq
rxresp
expect resp.status == 200
expect resp.http.C-Simple ~ {^\d+$}
expect resp.http.C-Medium ~ {^\d+$}
expect resp.http.C-Complex ~ {^\d+$}
expect resp.http.C-Simple > 0
expect resp.http.C-Medium > resp.http.C-Simple
expect resp.http.C-Complex > resp.http.C-Medium
} -run
......@@ -477,6 +477,23 @@ vmod_regex_extract(VRT_CTX, struct vmod_re2_regex *re, VCL_STRING text,
return rewrite_method(ctx, EXTRACT, re, text, rewrite, fallback);
}
VCL_INT
vmod_regex_cost(VRT_CTX, struct vmod_re2_regex *re)
{
int cost;
const char *err;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
if ((err = vre2_cost(re->vre2, &cost)) != NULL) {
VERR(ctx, "%s.cost(): Cannot retrieve cost: %s", re->vcl_name,
err);
return (-1);
}
return cost;
}
/* Regex function interface */
#define ERR_PREFIX "re2.match(pattern=\"%.40s\", text=\"%.40s\"): "
......
......@@ -26,6 +26,7 @@ SYNOPSIS
STRING <obj>.sub(STRING text, STRING rewrite)
STRING <obj>.suball(STRING text, STRING rewrite)
STRING <obj>.extract(STRING text, STRING rewrite)
INT <obj>.cost()
# regex function interface
BOOL re2.match(STRING pattern, STRING subject [, <regex options>])
......@@ -520,6 +521,19 @@ Example::
set resp.http.X-UUCP = email.extract("boris@kremvax.ru", "\2!\1");
}
$Method INT .cost()
Return a numeric measurement > 0 for this regex object from the RE2
library. According to the RE2 documentation:
... a very approximate measure of a regexp's "cost". Larger numbers
are more expensive than smaller numbers.
The absolute numeric values are opaque and not relevant, but they are
meaningful relative to one another -- more complex regexen have a
higher cost than less complex regexen. This may be useful during
development and optimization of regular expressions.
regex functional interface
==========================
......
......@@ -99,6 +99,12 @@ vre2::extract(string *out, const char * const text, const char * const rewrite)
return RE2::Extract(text, *re_, rewrite, out);
}
inline int
vre2::size() const
{
return re_->ProgramSize();
}
const char *
vre2_init(vre2 **vre2p, const char *pattern, unsigned utf8,
unsigned posix_syntax, unsigned longest_match, long max_mem,
......@@ -236,6 +242,16 @@ vre2_quotemeta(const char * const unquoted, char * const dest,
CATCHALL
}
const char *
vre2_cost(vre2 *vre2, int *cost)
{
try {
*cost = vre2->size();
return NULL;
}
CATCHALL
}
const char *
vre2_fini(vre2 **vre2)
{
......
......@@ -53,6 +53,7 @@ public:
bool global_replace(string *text, const char * const rewrite) const;
bool extract(string *out, const char * const text,
const char * const rewrite) const;
int size() const;
};
#else
typedef struct vre2 vre2;
......@@ -90,6 +91,7 @@ extern "C" {
const char * const rewrite,
char * const dest, const size_t bytes,
int * const match, size_t * const len);
const char *vre2_cost(vre2 *vre2, int *cost);
const char *vre2_quotemeta(const char * const unquoted,
char * const dest, const size_t bytes,
size_t * const len);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment