Commit 12719aa3 authored by Geoff Simmons's avatar Geoff Simmons

Add the .cost() method.

parent bdcdc921
...@@ -37,6 +37,7 @@ SYNOPSIS ...@@ -37,6 +37,7 @@ SYNOPSIS
STRING <obj>.sub(STRING text, STRING rewrite) STRING <obj>.sub(STRING text, STRING rewrite)
STRING <obj>.suball(STRING text, STRING rewrite) STRING <obj>.suball(STRING text, STRING rewrite)
STRING <obj>.extract(STRING text, STRING rewrite) STRING <obj>.extract(STRING text, STRING rewrite)
INT <obj>.cost()
# regex function interface # regex function interface
BOOL re2.match(STRING pattern, STRING subject [, <regex options>]) BOOL re2.match(STRING pattern, STRING subject [, <regex options>])
...@@ -606,6 +607,23 @@ Example:: ...@@ -606,6 +607,23 @@ Example::
set resp.http.X-UUCP = email.extract("boris@kremvax.ru", "\2!\1"); set resp.http.X-UUCP = email.extract("boris@kremvax.ru", "\2!\1");
} }
.. _func_regex.cost:
INT xregex.cost()
-----------------
Return a numeric measurement > 0 for this regex object from the RE2
library. According to the RE2 documentation:
... a very approximate measure of a regexp's "cost". Larger numbers
are more expensive than smaller numbers.
The absolute numeric values are opaque and not relevant, but they are
meaningful relative to one another -- more complex regexen have a
higher cost than less complex regexen. This may be useful during
development and optimization of regular expressions.
regex functional interface regex functional interface
========================== ==========================
......
# looks like -*- vcl -*-
varnishtest "cost() function"
varnish v1 -vcl {
import ${vmod_re2};
backend b { .host = "${bad_ip}"; }
sub vcl_init {
# Tests from re2 testing/re2_test.cc
new simple = re2.regex("simple regexp");
new medium = re2.regex("medium.*regexp");
new complex = re2.regex("complex.{1,128}regexp");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.C-Simple = simple.cost();
set resp.http.C-Medium = medium.cost();
set resp.http.C-Complex = complex.cost();
}
} -start
client c1 {
txreq
rxresp
expect resp.status == 200
expect resp.http.C-Simple ~ {^\d+$}
expect resp.http.C-Medium ~ {^\d+$}
expect resp.http.C-Complex ~ {^\d+$}
expect resp.http.C-Simple > 0
expect resp.http.C-Medium > resp.http.C-Simple
expect resp.http.C-Complex > resp.http.C-Medium
} -run
...@@ -477,6 +477,23 @@ vmod_regex_extract(VRT_CTX, struct vmod_re2_regex *re, VCL_STRING text, ...@@ -477,6 +477,23 @@ vmod_regex_extract(VRT_CTX, struct vmod_re2_regex *re, VCL_STRING text,
return rewrite_method(ctx, EXTRACT, re, text, rewrite, fallback); return rewrite_method(ctx, EXTRACT, re, text, rewrite, fallback);
} }
VCL_INT
vmod_regex_cost(VRT_CTX, struct vmod_re2_regex *re)
{
int cost;
const char *err;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
if ((err = vre2_cost(re->vre2, &cost)) != NULL) {
VERR(ctx, "%s.cost(): Cannot retrieve cost: %s", re->vcl_name,
err);
return (-1);
}
return cost;
}
/* Regex function interface */ /* Regex function interface */
#define ERR_PREFIX "re2.match(pattern=\"%.40s\", text=\"%.40s\"): " #define ERR_PREFIX "re2.match(pattern=\"%.40s\", text=\"%.40s\"): "
......
...@@ -26,6 +26,7 @@ SYNOPSIS ...@@ -26,6 +26,7 @@ SYNOPSIS
STRING <obj>.sub(STRING text, STRING rewrite) STRING <obj>.sub(STRING text, STRING rewrite)
STRING <obj>.suball(STRING text, STRING rewrite) STRING <obj>.suball(STRING text, STRING rewrite)
STRING <obj>.extract(STRING text, STRING rewrite) STRING <obj>.extract(STRING text, STRING rewrite)
INT <obj>.cost()
# regex function interface # regex function interface
BOOL re2.match(STRING pattern, STRING subject [, <regex options>]) BOOL re2.match(STRING pattern, STRING subject [, <regex options>])
...@@ -520,6 +521,19 @@ Example:: ...@@ -520,6 +521,19 @@ Example::
set resp.http.X-UUCP = email.extract("boris@kremvax.ru", "\2!\1"); set resp.http.X-UUCP = email.extract("boris@kremvax.ru", "\2!\1");
} }
$Method INT .cost()
Return a numeric measurement > 0 for this regex object from the RE2
library. According to the RE2 documentation:
... a very approximate measure of a regexp's "cost". Larger numbers
are more expensive than smaller numbers.
The absolute numeric values are opaque and not relevant, but they are
meaningful relative to one another -- more complex regexen have a
higher cost than less complex regexen. This may be useful during
development and optimization of regular expressions.
regex functional interface regex functional interface
========================== ==========================
......
...@@ -99,6 +99,12 @@ vre2::extract(string *out, const char * const text, const char * const rewrite) ...@@ -99,6 +99,12 @@ vre2::extract(string *out, const char * const text, const char * const rewrite)
return RE2::Extract(text, *re_, rewrite, out); return RE2::Extract(text, *re_, rewrite, out);
} }
inline int
vre2::size() const
{
return re_->ProgramSize();
}
const char * const char *
vre2_init(vre2 **vre2p, const char *pattern, unsigned utf8, vre2_init(vre2 **vre2p, const char *pattern, unsigned utf8,
unsigned posix_syntax, unsigned longest_match, long max_mem, unsigned posix_syntax, unsigned longest_match, long max_mem,
...@@ -236,6 +242,16 @@ vre2_quotemeta(const char * const unquoted, char * const dest, ...@@ -236,6 +242,16 @@ vre2_quotemeta(const char * const unquoted, char * const dest,
CATCHALL CATCHALL
} }
const char *
vre2_cost(vre2 *vre2, int *cost)
{
try {
*cost = vre2->size();
return NULL;
}
CATCHALL
}
const char * const char *
vre2_fini(vre2 **vre2) vre2_fini(vre2 **vre2)
{ {
......
...@@ -53,6 +53,7 @@ public: ...@@ -53,6 +53,7 @@ public:
bool global_replace(string *text, const char * const rewrite) const; bool global_replace(string *text, const char * const rewrite) const;
bool extract(string *out, const char * const text, bool extract(string *out, const char * const text,
const char * const rewrite) const; const char * const rewrite) const;
int size() const;
}; };
#else #else
typedef struct vre2 vre2; typedef struct vre2 vre2;
...@@ -90,6 +91,7 @@ extern "C" { ...@@ -90,6 +91,7 @@ extern "C" {
const char * const rewrite, const char * const rewrite,
char * const dest, const size_t bytes, char * const dest, const size_t bytes,
int * const match, size_t * const len); int * const match, size_t * const len);
const char *vre2_cost(vre2 *vre2, int *cost);
const char *vre2_quotemeta(const char * const unquoted, const char *vre2_quotemeta(const char * const unquoted,
char * const dest, const size_t bytes, char * const dest, const size_t bytes,
size_t * const len); size_t * const len);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment