Commit 4c0baa24 authored by Geoff Simmons's avatar Geoff Simmons

add support for RE2 options, and take advantage of never_capture to

avoid using workspace for capture data
parent 917c66d3
......@@ -15,6 +15,7 @@ varnish v1 -vcl+backend {
new frobnitz = re2.regex("(frob)(nitz)");
new barbaz = re2.regex("(bar)(baz)");
new azbc = re2.regex("(a|(z))(bc)");
new never = re2.regex("(bar)(baz)", never_capture=true);
}
sub vcl_deliver {
......@@ -55,6 +56,17 @@ varnish v1 -vcl+backend {
} else {
set resp.http.abc = "fail";
}
if (never.match(resp.http.foo)) {
set resp.http.never = "match";
}
/*
* backrefs always fail, including backref 0, when
* never_capture=true
*/
set resp.http.never0 = never.backref(0, "fallback0");
set resp.http.never1 = never.backref(1, "fallback1");
set resp.http.never2 = never.backref(2, "fallback2");
}
} -start
......@@ -79,6 +91,10 @@ client c1 {
expect resp.http.abc1 == "a"
expect resp.http.abc2 == "none"
expect resp.http.abc3 == "bc"
expect resp.http.never == "match"
expect resp.http.never0 == "fallback0"
expect resp.http.never1 == "fallback1"
expect resp.http.never2 == "fallback2"
} -run
logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
......@@ -91,5 +107,9 @@ logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
# would have also failed due to failing prior match
expect * = VCL_Error "^vmod re2 error: frobnitz\.backref\(3, \"fallback3\"\): backref out of range \(max 3\)$"
expect * = VCL_Error "^vmod re2 error: never\.backref\(0, \"fallback0\"\): never_capture is true for object never$"
expect * = VCL_Error "^vmod re2 error: never\.backref\(1, \"fallback1\"\): never_capture is true for object never$"
expect * = VCL_Error "^vmod re2 error: never\.backref\(2, \"fallback2\"\): never_capture is true for object never$"
expect * = End
} -run
......@@ -55,6 +55,7 @@ struct vmod_re2_regex {
char *vcl_name;
pthread_key_t matchk;
int ngroups;
unsigned never_capture;
};
static char c;
......@@ -90,7 +91,12 @@ init_matchsz(void)
VCL_VOID
vmod_regex__init(const struct vrt_ctx *ctx, struct vmod_re2_regex **rep,
const char *vcl_name, VCL_STRING pattern)
const char *vcl_name, VCL_STRING pattern, VCL_BOOL utf8,
VCL_BOOL posix_syntax, VCL_BOOL longest_match, VCL_INT max_mem,
VCL_BOOL literal, VCL_BOOL never_nl, VCL_BOOL dot_nl,
VCL_BOOL never_capture, VCL_BOOL case_sensitive,
VCL_BOOL perl_classes, VCL_BOOL word_boundary,
VCL_BOOL one_line)
{
struct vmod_re2_regex *re;
const char *err;
......@@ -105,17 +111,23 @@ vmod_regex__init(const struct vrt_ctx *ctx, struct vmod_re2_regex **rep,
*rep = re;
AZ(pthread_key_create(&re->matchk, NULL));
if ((err = vre2_init(&re->vre2, pattern)) != NULL) {
if ((err = vre2_init(&re->vre2, pattern, utf8, posix_syntax,
longest_match, max_mem, literal, never_nl, dot_nl,
never_capture, case_sensitive, perl_classes,
word_boundary, one_line)) != NULL) {
VERR(ctx, "Cannot compile '%s' in %s constructor: %s", pattern,
vcl_name, err);
return;
}
if ((err = vre2_ngroups(re->vre2, &re->ngroups)) != NULL) {
VERR(ctx, "Cannot obtain number of capturing groups in %s "
"constructor: %s", vcl_name, err);
return;
if (! never_capture) {
if ((err = vre2_ngroups(re->vre2, &re->ngroups)) != NULL) {
VERR(ctx, "Cannot obtain number of capturing groups in "
"%s constructor: %s", vcl_name, err);
return;
}
assert(re->ngroups >= 0);
}
assert(re->ngroups >= 0);
re->never_capture = never_capture;
re->vcl_name = strdup(vcl_name);
AZ(pthread_once(&sz_init_once, init_matchsz));
}
......@@ -139,9 +151,9 @@ VCL_BOOL
vmod_regex_match(const struct vrt_ctx *ctx, struct vmod_re2_regex *re,
VCL_STRING subject)
{
int match = 0;
int match = 0, ngroups = 0;
const char *err;
char *text;
char *text = (void *) subject;
void *group = NULL;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
......@@ -153,13 +165,14 @@ vmod_regex_match(const struct vrt_ctx *ctx, struct vmod_re2_regex *re,
AZ(pthread_setspecific(re->matchk, match_failed));
if ((text = WS_Copy(ctx->ws, subject, -1)) == NULL) {
VERR(ctx, ERR_PREFIX "insufficient workspace to copy subject",
re->vcl_name, subject);
return 0;
}
if (re->ngroups >= 0) {
if ((group = WS_Alloc(ctx->ws, (re->ngroups + 1) * match_sz))
if (! re->never_capture) {
ngroups = re->ngroups + 1;
if ((text = WS_Copy(ctx->ws, subject, -1)) == NULL) {
VERR(ctx, ERR_PREFIX "insufficient workspace to copy "
"subject", re->vcl_name, subject);
return 0;
}
if ((group = WS_Alloc(ctx->ws, ngroups * match_sz))
== NULL) {
VERR(ctx, ERR_PREFIX "insufficient workspace to "
"allocate match data", re->vcl_name, subject);
......@@ -168,17 +181,19 @@ vmod_regex_match(const struct vrt_ctx *ctx, struct vmod_re2_regex *re,
}
}
if ((err = vre2_match(re->vre2, text, &match, re->ngroups, group))
if ((err = vre2_match(re->vre2, text, &match, ngroups, group))
!= NULL) {
VERR(ctx, ERR_PREFIX "%s", re->vcl_name, subject, err);
WS_Reset(ctx->ws, text);
return 0;
}
if (match)
AZ(pthread_setspecific(re->matchk, group));
else
WS_Reset(ctx->ws, text);
if (! re->never_capture) {
if (match)
AZ(pthread_setspecific(re->matchk, group));
else
WS_Reset(ctx->ws, text);
}
return match;
#undef ERR_PREFIX
......@@ -200,6 +215,11 @@ vmod_regex_backref(VRT_CTX, struct vmod_re2_regex *re, VCL_INT refnum,
#define ERR_PREFIX "%s.backref(%ld, \"%s\"): "
if (re->never_capture) {
VERR(ctx, ERR_PREFIX "never_capture is true for object %s",
re->vcl_name, refnum, fallback, re->vcl_name);
return fallback;
}
if (refnum > re->ngroups) {
VERR(ctx, ERR_PREFIX "backref out of range (max %d)",
re->vcl_name, refnum, fallback, re->ngroups);
......
......@@ -9,7 +9,11 @@
$Module re2 3 access the Google RE2 regular expression engine
$Object regex(STRING pattern)
$Object regex(STRING pattern, BOOL utf8=0, BOOL posix_syntax=0,
BOOL longest_match=0, INT max_mem=8388608, BOOL literal=0,
BOOL never_nl=0, BOOL dot_nl=0, BOOL never_capture=0,
BOOL case_sensitive=1, BOOL perl_classes=0,
BOOL word_boundary=0, BOOL one_line=0)
$Method BOOL .match(STRING)
......
......@@ -42,8 +42,8 @@
using namespace std;
vre2::vre2(const char *pattern) {
re_ = new RE2(pattern, RE2::Quiet);
vre2::vre2(const char *pattern, RE2::Options * const opt) {
re_ = new RE2(pattern, *opt);
if (!re_->ok())
throw runtime_error(re_->error());
}
......@@ -59,7 +59,7 @@ inline bool
vre2::match(const char *subject, const int ngroups, StringPiece* groups) const
{
return re_->Match(subject, 0, strlen(subject), RE2::UNANCHORED,
groups, ngroups + 1);
groups, ngroups);
}
inline const int
......@@ -69,10 +69,31 @@ vre2::ngroups() const
}
const char *
vre2_init(vre2 **vre2p, const char *pattern)
vre2_init(vre2 **vre2p, const char *pattern, unsigned utf8,
unsigned posix_syntax, unsigned longest_match, long max_mem,
unsigned literal, unsigned never_nl, unsigned dot_nl,
unsigned never_capture, unsigned case_sensitive, unsigned perl_classes,
unsigned word_boundary, unsigned one_line)
{
try {
*vre2p = new vre2(pattern);
RE2::Options opt;
opt.set_log_errors(false);
if (utf8)
opt.set_encoding(RE2::Options::EncodingUTF8);
else
opt.set_encoding(RE2::Options::EncodingLatin1);
opt.set_posix_syntax(posix_syntax);
opt.set_longest_match(longest_match);
opt.set_max_mem(max_mem);
opt.set_literal(literal);
opt.set_never_nl(never_nl);
opt.set_dot_nl(dot_nl);
opt.set_never_capture(never_capture);
opt.set_case_sensitive(case_sensitive);
opt.set_perl_classes(perl_classes);
opt.set_word_boundary(word_boundary);
opt.set_one_line(one_line);
*vre2p = new vre2(pattern, &opt);
return NULL;
}
CATCHALL
......
......@@ -41,7 +41,7 @@ private:
RE2* re_;
public:
vre2(const char *pattern);
vre2(const char *pattern, RE2::Options * const opt);
virtual ~vre2();
bool match(const char *subject, int ngroups, StringPiece* groups) const;
const int ngroups() const;
......@@ -54,7 +54,13 @@ typedef struct vre2 vre2;
extern "C" {
#endif
const char *vre2_init(vre2 **vre2, const char * pattern);
const char *vre2_init(vre2 **vre2, const char * pattern, unsigned utf8,
unsigned posix_syntax, unsigned longest_match,
long max_mem, unsigned literal,
unsigned never_nl, unsigned dot_nl,
unsigned never_capture, unsigned case_sensitive,
unsigned perl_classes, unsigned word_boundary,
unsigned one_line);
const char *vre2_fini(vre2 **vre2);
const size_t vre2_matchsz(void);
const char *vre2_ngroups(vre2 *vre2, int * const ngroups);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment