Commit e65ac0a6 authored by Geoff Simmons's avatar Geoff Simmons

first working version of the backref method

parent 0e3794c3
...@@ -28,6 +28,7 @@ CONTENTS ...@@ -28,6 +28,7 @@ CONTENTS
======== ========
* Object regex * Object regex
* STRING regex.backref(INT, STRING)
* BOOL regex.match(STRING) * BOOL regex.match(STRING)
* STRING version() * STRING version()
...@@ -45,6 +46,14 @@ BOOL regex.match(STRING) ...@@ -45,6 +46,14 @@ BOOL regex.match(STRING)
Prototype Prototype
BOOL regex.match(STRING) BOOL regex.match(STRING)
.. _func_regex.backref:
STRING regex.backref(INT, STRING)
---------------------------------
Prototype
STRING regex.backref(INT ref, STRING fallback)
.. _func_version: .. _func_version:
STRING version() STRING version()
......
# looks like -*- vcl -*-
varnishtest "cached compiled regexen"
# run two clients, to test caching of compiled regexen
varnish v1 -vcl+backend {
import re2 from "${vmod_topbuild}/src/.libs/libvmod_re2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new barbaz = re2.regex("(bar)(baz)");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
if (barbaz.match("barbaz")) {
set resp.http.foo0 = barbaz.backref(0, "error0");
set resp.http.foo1 = barbaz.backref(1, "error1");
set resp.http.foo2 = barbaz.backref(2, "error2");
} else {
set resp.status = 999;
}
}
} -start
client c1 {
txreq
rxresp
expect resp.status == 200
expect resp.http.foo0 == "barbaz"
expect resp.http.foo1 == "bar"
expect resp.http.foo2 == "baz"
}
client c1 -run
client c1 -run
...@@ -51,24 +51,17 @@ ...@@ -51,24 +51,17 @@
struct vmod_re2_regex { struct vmod_re2_regex {
unsigned magic; unsigned magic;
#define VMOD_RE2_REGEX_MAGIC 0x5c3f6f24 #define VMOD_RE2_REGEX_MAGIC 0x5c3f6f24
pthread_key_t ovk;
vre2 *vre2; vre2 *vre2;
char *vcl_name; char *vcl_name;
pthread_key_t matchk;
int ngroups;
}; };
#if 0
typedef struct ov_s {
unsigned magic;
#define OV_MAGIC 0x844bfa39
const char *subject;
int ovector[MAX_OV_USED];
} ov_t;
#endif
#if 0
static char c; static char c;
static const void *match_failed = (void *) &c; static const void *match_failed = (void *) &c;
#endif
static size_t match_sz;
static pthread_once_t sz_init_once = PTHREAD_ONCE_INIT;
static void static void
errmsg(VRT_CTX, const char *fmt, ...) errmsg(VRT_CTX, const char *fmt, ...)
...@@ -89,6 +82,12 @@ errmsg(VRT_CTX, const char *fmt, ...) ...@@ -89,6 +82,12 @@ errmsg(VRT_CTX, const char *fmt, ...)
va_end(args); va_end(args);
} }
static void
init_matchsz(void)
{
match_sz = vre2_matchsz();
}
VCL_VOID VCL_VOID
vmod_regex__init(const struct vrt_ctx *ctx, struct vmod_re2_regex **rep, vmod_regex__init(const struct vrt_ctx *ctx, struct vmod_re2_regex **rep,
const char *vcl_name, VCL_STRING pattern) const char *vcl_name, VCL_STRING pattern)
...@@ -105,14 +104,20 @@ vmod_regex__init(const struct vrt_ctx *ctx, struct vmod_re2_regex **rep, ...@@ -105,14 +104,20 @@ vmod_regex__init(const struct vrt_ctx *ctx, struct vmod_re2_regex **rep,
AN(re); AN(re);
*rep = re; *rep = re;
AZ(pthread_key_create(&re->ovk, NULL)); AZ(pthread_key_create(&re->matchk, NULL));
err = vre2_init(&re->vre2, pattern); if ((err = vre2_init(&re->vre2, pattern)) != NULL) {
if (err != NULL) {
VERR(ctx, "Cannot compile '%s' in %s constructor: %s", pattern, VERR(ctx, "Cannot compile '%s' in %s constructor: %s", pattern,
vcl_name, err); vcl_name, err);
return; return;
} }
if ((err = vre2_ngroups(re->vre2, &re->ngroups)) != NULL) {
VERR(ctx, "Cannot obtain number of capturing groups in %s "
"constructor: %s", vcl_name, err);
return;
}
assert(re->ngroups >= 0);
re->vcl_name = strdup(vcl_name); re->vcl_name = strdup(vcl_name);
AZ(pthread_once(&sz_init_once, init_matchsz));
} }
VCL_VOID VCL_VOID
...@@ -124,7 +129,7 @@ vmod_regex__fini(struct vmod_re2_regex **rep) ...@@ -124,7 +129,7 @@ vmod_regex__fini(struct vmod_re2_regex **rep)
*rep = NULL; *rep = NULL;
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC); CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
vre2_fini(&re->vre2); vre2_fini(&re->vre2);
AZ(pthread_key_delete(re->ovk)); AZ(pthread_key_delete(re->matchk));
if (re->vcl_name != NULL) if (re->vcl_name != NULL)
free(re->vcl_name); free(re->vcl_name);
FREE_OBJ(re); FREE_OBJ(re);
...@@ -136,14 +141,99 @@ vmod_regex_match(const struct vrt_ctx *ctx, struct vmod_re2_regex *re, ...@@ -136,14 +141,99 @@ vmod_regex_match(const struct vrt_ctx *ctx, struct vmod_re2_regex *re,
{ {
int match = 0; int match = 0;
const char *err; const char *err;
char *text;
void *group = NULL;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC); CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
if ((err = vre2_match(re->vre2, subject, &match)) != NULL) { if (subject == NULL)
VERR(ctx, "%s.match(\"%s\"): %s", re->vcl_name, subject, err); subject = "";
#define ERR_PREFIX "%s.match(\"%s\"): "
AZ(pthread_setspecific(re->matchk, match_failed));
if ((text = WS_Copy(ctx->ws, subject, -1)) == NULL) {
VERR(ctx, ERR_PREFIX "insufficient workspace to copy subject",
re->vcl_name, subject);
return 0;
}
if (re->ngroups >= 0) {
if ((group = WS_Alloc(ctx->ws, (re->ngroups + 1) * match_sz))
== NULL) {
VERR(ctx, ERR_PREFIX "insufficient workspace to "
"allocate match data", re->vcl_name, subject);
WS_Reset(ctx->ws, text);
return 0;
}
}
if ((err = vre2_match(re->vre2, text, &match, re->ngroups, group))
!= NULL) {
VERR(ctx, ERR_PREFIX "%s", re->vcl_name, subject, err);
WS_Reset(ctx->ws, text);
return 0; return 0;
} }
if (match)
AZ(pthread_setspecific(re->matchk, group));
else if (group != NULL)
WS_Reset(ctx->ws, group);
return match; return match;
#undef ERR_PREFIX
}
VCL_STRING
vmod_regex_backref(VRT_CTX, struct vmod_re2_regex *re, VCL_INT refnum,
VCL_STRING fallback)
{
void *group;
const char *err, *capture;
char *backref;
int len;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
AN(fallback);
assert(refnum >= 0);
#define ERR_PREFIX "%s.backref(%ld, \"%s\"): "
if (refnum > re->ngroups) {
VERR(ctx, ERR_PREFIX "backref out of range (max %d)",
re->vcl_name, refnum, fallback, re->ngroups);
return fallback;
}
group = pthread_getspecific(re->matchk);
if (group == NULL) {
VERR(ctx, ERR_PREFIX "backref called without prior match",
re->vcl_name, refnum, fallback);
return fallback;
}
if (group == match_failed)
return fallback;
assert((char *)group >= ctx->ws->s && (char *)group < ctx->ws->e);
if ((err = vre2_capture(group, (int) refnum, &capture, &len))
!= NULL) {
VERR(ctx, ERR_PREFIX "error retrieving capture: %s",
re->vcl_name, refnum, fallback, err);
return fallback;
}
assert(len >= 0);
if (len == 0)
return "";
if ((backref = WS_Copy(ctx->ws, capture, len + 1)) == NULL) {
VERR(ctx, ERR_PREFIX "insufficient workspace for backref",
re->vcl_name, refnum, fallback);
return fallback;
}
backref[len] = '\0';
return backref;
#undef ERR_PREFIX
} }
VCL_STRING VCL_STRING
......
...@@ -13,4 +13,6 @@ $Object regex(STRING pattern) ...@@ -13,4 +13,6 @@ $Object regex(STRING pattern)
$Method BOOL .match(STRING) $Method BOOL .match(STRING)
$Method STRING .backref(INT ref, STRING fallback = "**BACKREF FAILED**")
$Function STRING version() $Function STRING version()
...@@ -43,50 +43,88 @@ ...@@ -43,50 +43,88 @@
using namespace std; using namespace std;
vre2::vre2(const char *pattern) { vre2::vre2(const char *pattern) {
re_ = new RE2(pattern, RE2::Quiet); re_ = new RE2(pattern, RE2::Quiet);
if (!re_->ok()) if (!re_->ok())
throw runtime_error(re_->error()); throw runtime_error(re_->error());
} }
vre2::~vre2() { vre2::~vre2() {
if (re_) { if (re_) {
delete re_; delete re_;
re_ = NULL; re_ = NULL;
} }
} }
bool inline bool
vre2::match(const char *subject) vre2::match(const char *subject, int ngroups, StringPiece* groups)
{ {
return RE2::PartialMatchN(subject, *re_, NULL, 0); return re_->Match(subject, 0, strlen(subject), RE2::UNANCHORED,
groups, ngroups + 1);
}
inline int
vre2::ngroups()
{
return re_->NumberOfCapturingGroups();
} }
const char * const char *
vre2_init(vre2 **vre2p, const char *pattern) vre2_init(vre2 **vre2p, const char *pattern)
{ {
try { try {
*vre2p = new vre2(pattern); *vre2p = new vre2(pattern);
return NULL; return NULL;
} }
CATCHALL CATCHALL
}
const size_t
vre2_matchsz(void)
{
return sizeof(StringPiece);
} }
const char * const char *
vre2_match(vre2 *vre2, const char *subject, int *match) vre2_match(vre2 *vre2, const char *subject, int *match, int ngroups,
void *group)
{ {
try { try {
*match = vre2->match(subject); StringPiece* g = reinterpret_cast<StringPiece *>(group);
return NULL; *match = vre2->match(subject, ngroups, g);
} return NULL;
CATCHALL }
CATCHALL
}
const char *
vre2_capture(void *group, int refnum, const char **capture, int *len)
{
try {
StringPiece* g = reinterpret_cast<StringPiece *>(group);
StringPiece str = g[refnum];
*capture = str.data();
*len = str.length();
return NULL;
}
CATCHALL
}
const char *
vre2_ngroups(vre2 *vre2, int *ngroups)
{
try {
*ngroups = vre2->ngroups();
return NULL;
}
CATCHALL
} }
const char * const char *
vre2_fini(vre2 **vre2) vre2_fini(vre2 **vre2)
{ {
try { try {
delete *vre2; delete *vre2;
return NULL; return NULL;
} }
CATCHALL CATCHALL
} }
...@@ -38,24 +38,30 @@ using namespace re2; ...@@ -38,24 +38,30 @@ using namespace re2;
class vre2 { class vre2 {
private: private:
RE2* re_; RE2* re_;
public: public:
vre2(const char *pattern); vre2(const char *pattern);
virtual ~vre2(); virtual ~vre2();
bool match(const char *subject); bool match(const char *subject, int ngroups, StringPiece* groups);
int ngroups();
}; };
#else #else
typedef struct vre2 vre2; typedef struct vre2 vre2;
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
const char *vre2_init(vre2 **vre2, const char *pattern); const char *vre2_init(vre2 **vre2, const char *pattern);
const char *vre2_fini(vre2 **vre2); const char *vre2_fini(vre2 **vre2);
const char *vre2_match(vre2 *vre2, const char *subject, int *match); const size_t vre2_matchsz(void);
const char *vre2_ngroups(vre2 *vre2, int *ngroups);
const char *vre2_match(vre2 *vre2, const char *subject, int *match,
int ngroups, void *group);
const char *vre2_capture(void *group, int refnum, const char **capture,
int *len);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment