Commit be49a56f authored by Geoff Simmons's avatar Geoff Simmons

add the extract() method and function

parent 7cc8686d
......@@ -28,10 +28,12 @@ CONTENTS
========
* STRING backref(PRIV_TASK, INT, STRING)
* STRING extract(STRING, STRING, STRING, STRING, BOOL, BOOL, BOOL, INT, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL)
* BOOL match(PRIV_TASK, STRING, STRING, BOOL, BOOL, BOOL, INT, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL)
* STRING namedref(PRIV_TASK, STRING, STRING)
* Object regex
* STRING regex.backref(INT, STRING)
* STRING regex.extract(STRING, STRING, STRING)
* BOOL regex.match(STRING)
* STRING regex.namedref(STRING, STRING)
* STRING regex.sub(STRING, STRING, STRING)
......@@ -90,6 +92,14 @@ STRING regex.suball(STRING, STRING, STRING)
Prototype
STRING regex.suball(STRING text, STRING rewrite, STRING fallback)
.. _func_regex.extract:
STRING regex.extract(STRING, STRING, STRING)
--------------------------------------------
Prototype
STRING regex.extract(STRING text, STRING rewrite, STRING fallback)
.. _obj_set:
Object set
......@@ -160,6 +170,14 @@ STRING suball(STRING, STRING, STRING, STRING, BOOL, BOOL, BOOL, INT, BOOL, BOOL,
Prototype
STRING suball(STRING pattern, STRING text, STRING rewrite, STRING fallback, BOOL utf8, BOOL posix_syntax, BOOL longest_match, INT max_mem, BOOL literal, BOOL never_nl, BOOL dot_nl, BOOL never_capture, BOOL case_sensitive, BOOL perl_classes, BOOL word_boundary, BOOL one_line)
.. _func_extract:
STRING extract(STRING, STRING, STRING, STRING, BOOL, BOOL, BOOL, INT, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL)
---------------------------------------------------------------------------------------------------------------------
Prototype
STRING extract(STRING pattern, STRING text, STRING rewrite, STRING fallback, BOOL utf8, BOOL posix_syntax, BOOL longest_match, INT max_mem, BOOL literal, BOOL never_nl, BOOL dot_nl, BOOL never_capture, BOOL case_sensitive, BOOL perl_classes, BOOL word_boundary, BOOL one_line)
.. _func_version:
STRING version()
......
# looks like -*- vcl -*-
varnishtest "extract() method and function"
varnish v1 -vcl {
import re2 from "${vmod_topbuild}/src/.libs/libvmod_re2.so";
backend be { .host = "${bad_ip}"; }
sub vcl_init {
new email = re2.regex("(.*)@([^.]*)");
new dotstar = re2.regex(".*");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
# Tests from re2 testing/re2_test.cc
set resp.http.uucp = email.extract("boris@kremvax.ru", "\2!\1");
set resp.http.quote = dotstar.extract("foo", "'\0'");
set resp.http.uucpf = re2.extract("(.*)@([^.]*)",
"boris@kremvax.ru", "\2!\1");
set resp.http.quotef = re2.extract(".*", "foo", "'\0'");
# Match failure
set resp.http.fail = email.extract("foo", "bar", "fallback");
set resp.http.failf = re2.extract("(.*)@([^.]*)", "foo", "bar",
"fallbackf");
# Undefined fallback
set resp.http.undeffallback
= email.extract("foo", "bar", req.http.undef);
set resp.http.undeffallbackf
= re2.extract(".", "foo", "bar", req.http.undef);
# Undefined pattern in the function
set resp.http.undefpattern
= re2.extract(req.http.undef, "", "", "pattern undef");
# Undefined text
set resp.http.undeftext
= email.extract(req.http.undef, "x", "text undef");
set resp.http.undeftextf
= re2.extract(".", req.http.undef, "x", "text undef");
# Undefined rewrite
set resp.http.undefrewrite
= email.extract("b", req.http.undef, "rewrite undef");
set resp.http.undefrewritef
= re2.extract(".", "b", req.http.undef,"rewrite undef");
# Default fallbacks
set resp.http.fallback = email.extract("foo", "bar");
set resp.http.fallbackf = re2.extract("(.*)@([^.]*)", "foo",
"bar");
}
} -start
client c1 {
txreq
rxresp
expect resp.http.uucp == "kremvax!boris"
expect resp.http.quote == "'foo'"
expect resp.http.uucpf == "kremvax!boris"
expect resp.http.quotef == "'foo'"
expect resp.http.fail == "fallback"
expect resp.http.failf == "fallbackf"
expect resp.http.undeffallback == "**EXTRACT METHOD FAILED**"
expect resp.http.undeffallbackf == "**EXTRACT FUNCTION FAILED**"
expect resp.http.undefpattern == "pattern undef"
expect resp.http.undeftext == "text undef"
expect resp.http.undeftextf == "text undef"
expect resp.http.undefrewrite == "rewrite undef"
expect resp.http.undefrewritef == "rewrite undef"
expect resp.http.fallback == "**EXTRACT METHOD FAILED**"
expect resp.http.fallbackf == "**EXTRACT FUNCTION FAILED**"
} -run
logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod re2 error: email.extract..: fallback is undefined$"
expect * = VCL_Error "^vmod re2 error: re2.extract..: fallback is undefined$"
expect * = VCL_Error "^vmod re2 error: re2.extract.pattern=<undefined>, fallback=.pattern undef..: pattern is undefined$"
expect * = VCL_Error "^vmod re2 error: email.extract.text=<undefined>, fallback=.text undef..: text is undefined$"
expect * = VCL_Error "^vmod re2 error: re2.extract.pattern=..., text=<undefined>, fallback=.text undef..: text is undefined$"
expect * = VCL_Error "^vmod re2 error: email.extract.text=.b., rewrite=<undefined>, fallback=.rewrite undef..: rewrite is undefined$"
expect * = VCL_Error "^vmod re2 error: re2.extract.pattern=..., text=.b., rewrite=<undefined>, fallback=.rewrite undef..: rewrite is undefined$"
expect * = End
} -run
......@@ -265,6 +265,35 @@ sub(VRT_CTX, vre2 * restrict vre2, const int all, VCL_STRING text,
#undef ERR_PREFIX
#define ERR_PREFIX "extract(text=\"%s\", rewrite=\"%s\", fallback=\"%s\"): "
static VCL_STRING
extract(VRT_CTX, vre2 * restrict vre2, VCL_STRING text, VCL_STRING rewrite,
VCL_STRING fallback)
{
int match = 0;
size_t bytes, len;
char *ret;
const char *err;
ret = WS_Snapshot(ctx->ws);
bytes = WS_Reserve(ctx->ws, 0);
if ((err = vre2_extract(vre2, text, rewrite, ret, bytes, &match, &len))
!= NULL) {
VERR(ctx, ERR_PREFIX "%s", text, rewrite, fallback, err);
WS_Release(ctx->ws, 0);
return fallback;
}
if (!match) {
WS_Release(ctx->ws, 0);
return fallback;
}
WS_Release(ctx->ws, len);
return ret;
}
#undef ERR_PREFIX
/* Event function */
int
......@@ -463,6 +492,31 @@ vmod_regex_suball(VRT_CTX, struct vmod_re2_regex *re, VCL_STRING text,
return sub_method(ctx, "suball", 1, re, text, rewrite, fallback);
}
VCL_STRING
vmod_regex_extract(VRT_CTX, struct vmod_re2_regex *re, VCL_STRING text,
VCL_STRING rewrite, VCL_STRING fallback)
{
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(re, VMOD_RE2_REGEX_MAGIC);
if (fallback == NULL) {
VERR(ctx, "%s.extract(): fallback is undefined", re->vcl_name);
return "**EXTRACT METHOD FAILED**";
}
if (text == NULL) {
VERR(ctx, "%s.extract(text=<undefined>, fallback=\"%s\"): "
"text is undefined", re->vcl_name, fallback);
return fallback;
}
if (rewrite == NULL) {
VERR(ctx, "%s.extract(text=\"%s\", rewrite=<undefined>, "
"fallback=\"%s\"): rewrite is undefined", re->vcl_name,
text, fallback);
return fallback;
}
return extract(ctx, re->vre2, text, rewrite, fallback);
}
/* Object set */
VCL_VOID
......@@ -832,6 +886,58 @@ vmod_suball(VRT_CTX, VCL_STRING pattern, VCL_STRING text, VCL_STRING rewrite,
perl_classes, word_boundary, one_line);
}
#define ERR_PREFIX "re2.extract(pattern=\"%s\", text=\"%s\", rewrite=\"%s\", fallback=\"%s\"): "
VCL_STRING
vmod_extract(VRT_CTX, VCL_STRING pattern, VCL_STRING text, VCL_STRING rewrite,
VCL_STRING fallback, VCL_BOOL utf8, VCL_BOOL posix_syntax,
VCL_BOOL longest_match, VCL_INT max_mem, VCL_BOOL literal,
VCL_BOOL never_nl, VCL_BOOL dot_nl, VCL_BOOL never_capture,
VCL_BOOL case_sensitive, VCL_BOOL perl_classes,
VCL_BOOL word_boundary, VCL_BOOL one_line)
{
vre2 *vre2 = NULL;
const char *ret, *err;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
if (fallback == NULL) {
ERR(ctx, "re2.extract(): fallback is undefined");
return "**EXTRACT FUNCTION FAILED**";
}
if (pattern == NULL) {
VERR(ctx, "re2.extract(pattern=<undefined>, fallback=\"%s\"): "
"pattern is undefined", fallback);
return fallback;
}
if (text == NULL) {
VERR(ctx, "re2.extract(pattern=\"%s\", text=<undefined>, "
"fallback=\"%s\"): text is undefined", pattern, fallback);
return fallback;
}
if (rewrite == NULL) {
VERR(ctx, "re2.extract(pattern=\"%s\", text=\"%s\", "
"rewrite=<undefined>, fallback=\"%s\"): "
"rewrite is undefined", pattern, text, fallback);
return fallback;
}
if ((err = vre2_init(&vre2, pattern, utf8, posix_syntax, longest_match,
max_mem, literal, never_nl, dot_nl, never_capture,
case_sensitive, perl_classes, word_boundary,
one_line))
!= NULL) {
VERR(ctx, ERR_PREFIX "Cannot compile '%s': %s", pattern, text,
rewrite, fallback, pattern, err);
vre2_fini(&vre2);
return fallback;
}
ret = extract(ctx, vre2, text, rewrite, fallback);
vre2_fini(&vre2);
return ret;
}
#undef ERR_PREFIX
VCL_STRING
vmod_version(const struct vrt_ctx *ctx __attribute__((unused)))
{
......
......@@ -30,6 +30,9 @@ $Method STRING .sub(STRING text, STRING rewrite,
$Method STRING .suball(STRING text, STRING rewrite,
STRING fallback = "**SUBALL METHOD FAILED**")
$Method STRING .extract(STRING text, STRING rewrite,
STRING fallback = "**EXTRACT METHOD FAILED**")
$Object set(ENUM { none, start, both } anchor="none", BOOL utf8=0,
BOOL posix_syntax=0, BOOL longest_match=0, INT max_mem=8388608,
BOOL literal=0, BOOL never_nl=0, BOOL dot_nl=0,
......@@ -70,4 +73,12 @@ $Function STRING suball(STRING pattern, STRING text, STRING rewrite,
BOOL case_sensitive=1, BOOL perl_classes=0,
BOOL word_boundary=0, BOOL one_line=0)
$Function STRING extract(STRING pattern, STRING text, STRING rewrite,
STRING fallback = "**EXTRACT FUNCTION FAILED**",
BOOL utf8=0, BOOL posix_syntax=0, BOOL longest_match=0,
INT max_mem=8388608, BOOL literal=0, BOOL never_nl=0,
BOOL dot_nl=0, BOOL never_capture=0,
BOOL case_sensitive=1, BOOL perl_classes=0,
BOOL word_boundary=0, BOOL one_line=0)
$Function STRING version()
......@@ -92,6 +92,13 @@ vre2::global_replace(string *text, const char * const rewrite) const
return RE2::GlobalReplace(text, *re_, rewrite);
}
inline bool
vre2::extract(string *out, const char * const text, const char * const rewrite)
const
{
return RE2::Extract(text, *re_, rewrite, out);
}
const char *
vre2_init(vre2 **vre2p, const char *pattern, unsigned utf8,
unsigned posix_syntax, unsigned longest_match, long max_mem,
......@@ -198,6 +205,26 @@ vre2_replace(vre2 *vre2, const int all, const char * const text,
CATCHALL
}
const char *
vre2_extract(vre2 *vre2, const char * const text, const char * const rewrite,
char * const dest, const size_t bytes, int * const match,
size_t * const len)
{
try {
string out;
*match = vre2->extract(&out, text, rewrite);
if (!*match)
return NULL;
if (out.size() + 1 > bytes)
throw runtime_error("insufficient workspace");
*len = out.size() + 1;
out.copy(dest, *len);
dest[*len] = '\0';
return NULL;
}
CATCHALL
}
const char *
vre2_fini(vre2 **vre2)
{
......
......@@ -50,6 +50,8 @@ public:
int get_group(const char * const name) const;
bool replace(string *text, const char * const rewrite) const;
bool global_replace(string *text, const char * const rewrite) const;
bool extract(string *out, const char * const text,
const char * const rewrite) const;
};
#else
typedef struct vre2 vre2;
......@@ -81,6 +83,10 @@ extern "C" {
const char * const rewrite,
char * const dest, const size_t bytes,
int * const match, size_t * const len);
const char *vre2_extract(vre2 *vre2, const char * const text,
const char * const rewrite, char * const dest,
const size_t bytes, int * const match,
size_t * const len);
#ifdef __cplusplus
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment