Commit f81d9f81 authored by Geoff Simmons's avatar Geoff Simmons

Add the sub() method.

Still with plenty of repeated code to be factored out.
parent 3703c0b0
Pipeline #248 skipped
......@@ -84,6 +84,15 @@ regex.namedref
STRING regex.namedref(STRING name, STRING fallback="**NAMEDREF METHOD FAILED**")
.. _func_regex.sub:
regex.sub
---------
::
STRING regex.sub(PRIV_CALL, PRIV_TASK, STRING subject, STRING replacement, INT len=0, BOOL anchored=0, INT match_limit=0, INT offset_limit=0, BOOL notbol=0, BOOL noteol=0, BOOL notempty=0, BOOL notempty_atstart=0, BOOL no_jit=0, BOOL no_utf_check=0, INT recursion_limit=0, BOOL suball=0, BOOL sub_extended=0, BOOL unknown_unset=0, BOOL unset_empty=0)
.. _func_match:
match
......
# -*-mode: vcl; coding: raw-text -*-
# Those instructions tell emacs to encode the bytes in this file as
# is; in particular, not to use utf-8 encoding. You might want to set
# up your editor similarly -- tests will fail if the encoding is
# changed.
varnishtest "sub()"
# Tests from PCRE2 testoutput2
# As of Varnish 5.1.2, varnishtest rejects any strings of the form
# ${...} that it doesn't recognize as one of its own macros -- or if
# it does, then it replaces the macro. So we can't use that
# substitution syntax directly in the vtc source. As a workaround,
# include a VCL source that contains the offending ${}'s.
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new r1 = pcre2.regex("abc");
new r2 = pcre2.regex("(?<=abc)(|def)");
new r3 = pcre2.regex(".");
new r4 = pcre2.regex("(.)(.)");
new r5 = pcre2.regex("(?<A>.)(?<B>.)");
new r6 = pcre2.regex(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry");
new r7 = pcre2.regex("A", use_offset_limit=true);
new r8 = pcre2.regex("abcd");
new r9 = pcre2.regex("a(bc)(DE)");
new r10 = pcre2.regex("(?J)(?:(?<A>a)|(?<A>b))");
new r11 = pcre2.regex("(a)|(b)");
new r12 = pcre2.regex("(aa)(BB)");
new r13 = pcre2.regex("(?=a\K)");
new r14 = pcre2.regex("a|(b)c");
new r15 = pcre2.regex("a|(?'X'b)c");
}
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.r1-1 = r1.sub("123123", "XYZ");
set resp.http.r1-2 = r1.sub("123abc123", "XYZ");
set resp.http.r1-3 = r1.sub("123abc123abc123", "XYZ");
set resp.http.r1-4 = r1.sub("123abc123", "XYZ", suball=true);
set resp.http.r1-5
= r1.sub("123abc123abc123", "XYZ", suball=true);
set resp.http.r1-6 = r1.sub("123abc123", "X$$Z");
set resp.http.r1-7
= r1.sub("123abc123abc123", "X$$Z", suball=true);
set resp.http.r2
= r2.sub("123abcxyzabcdef789abcpqr", "<$0>", suball=true);
set resp.http.r3 = r3.sub("a", "$0");
set resp.http.r4-1 = r4.sub("abc", "$2+$1");
set resp.http.r4-2 = r4.sub("abcdefgh", "$2$1", suball=true);
set resp.http.r5 = r5.sub("abc", "$B+$A");
set resp.http.r6-1
= r6.sub("apple lemon blackberry", "<$*MARK>", suball=true);
set resp.http.r6-2
= r6.sub("apple strudel", "<$*MARK>", suball=true);
set resp.http.r6-3
= r6.sub("fruitless", "<$*MARK>", suball=true);
set resp.http.r7
= r7.sub("XAXAXAXAXA", "-", suball=true, offset_limit=4);
set resp.http.r8
= r8.sub("abcd", "w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$)",
sub_extended=true);
set resp.http.r9
= r9.sub("abcDE", "a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone",
sub_extended=true);
set resp.http.r10-1 = r10.sub("[a]", "<$A>");
set resp.http.r10-2 = r10.sub("[b]", "<$A>");
set resp.http.r12 = r12.sub("aaBB", "\U$1\L$2\E$1..\U$1\l$2$1",
sub_extended=true);
set resp.http.r14-1 = r14.sub("cat", ">$1<", unset_empty=true);
set resp.http.r14-2
= r14.sub("xbcom", ">$1<", unset_empty=true);
set resp.http.r14-3 = r14.sub("cat", ">$2<", unset_empty=true,
unknown_unset=true);
set resp.http.r15-1 = r15.sub("cat", ">$X<", unset_empty=true);
set resp.http.r15-2
= r15.sub("xbcom", ">$X<", unset_empty=true);
set resp.http.r15-3 = r15.sub("cat", ">$Y<", unset_empty=true,
unknown_unset=true);
# failures
set resp.http.r1fail-1 = r1.sub("123abc", "a$++");
set resp.http.r1fail-2 = r1.sub("123abc", "a$bad");
set resp.http.r1fail-3 = r1.sub("abc", "A$3123456789Z");
set resp.http.r8fail
= r8.sub("abcd", "xy\kz", sub_extended=true);
set resp.http.r11fail = r11.sub("b", "<$1>");
set resp.http.r13fail = r13.sub("BaCaD", "z");
set resp.http.r14fail-1
= r14.sub("cat", ">$2<", unset_empty=true);
set resp.http.r14fail-2
= r14.sub("cat", ">$2<", unknown_unset=true);
set resp.http.r15fail-1
= r15.sub("cat", ">$Y<", unset_empty=true);
set resp.http.r15fail-2
= r15.sub("cat", ">$Y<", unknown_unset=true);
return(deliver);
}
} -start
client c1 -repeat 2 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1-1 == "123123"
expect resp.http.r1-2 == "123XYZ123"
expect resp.http.r1-3 == "123XYZ123abc123"
expect resp.http.r1-4 == "123XYZ123"
expect resp.http.r1-5 == "123XYZ123XYZ123"
expect resp.http.r1-6 == "123X$Z123"
expect resp.http.r1-7 == "123X$Z123X$Z123"
expect resp.http.r2 == "123abc<>xyzabc<><def>789abc<>pqr"
expect resp.http.r3 == "a"
expect resp.http.r4-1 == "b+ac"
expect resp.http.r4-2 == "badcfehg"
expect resp.http.r6-1 == "<pear> <orange> <strawberry>"
expect resp.http.r6-2 == "<pear> strudel"
expect resp.http.r6-3 == "fruitless"
expect resp.http.r7 == "X-X-XAXAXA"
expect resp.http.r8 == {w xyz(12\$34$$\x345$)}
expect resp.http.r9 == "aBcBCbcdEdeabAByzDone"
expect resp.http.r10-1 == "[<a>]"
expect resp.http.r10-2 == "[<b>]"
expect resp.http.r12 == "AAbbaa..AAbBaa"
expect resp.http.r14-1 == "c><t"
expect resp.http.r14-2 == "x>b<om"
expect resp.http.r14-3 == "c><t"
expect resp.http.r15-1 == "c><t"
expect resp.http.r15-2 == "x>b<om"
expect resp.http.r15-3 == "c><t"
expect resp.http.r1fail-1 == ""
expect resp.http.r1fail-2 == ""
expect resp.http.r1fail-3 == ""
expect resp.http.r8fail == ""
expect resp.http.r11fail == ""
expect resp.http.r13fail == ""
expect resp.http.r14fail-1 == ""
expect resp.http.r14fail-2 == ""
expect resp.http.r15fail-1 == ""
expect resp.http.r15fail-2 == ""
} -run
logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod pcre2 error: in r1.sub..: invalid replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r1.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in r1.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in r8.sub..: bad escape sequence in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r11.sub..: requested value is not set$"
expect * = VCL_Error "^vmod pcre2 error: in r13.sub..: match with end before start is not supported$"
expect * = VCL_Error "^vmod pcre2 error: in r14.sub..: unknown substring"
expect * = VCL_Error "^vmod pcre2 error: in r14.sub..: requested value is not set"
expect * = VCL_Error "^vmod pcre2 error: in r15.sub..: unknown substring"
expect * = VCL_Error "^vmod pcre2 error: in r15.sub..: requested value is not set"
expect * = End
} -run
# Tests that require the ${...} syntax and the include
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new r1 = pcre2.regex("a(b)c(d)e");
new r2 = pcre2.regex("a(?<ONE>b)c(?<TWO>d)e");
new r3 = pcre2.regex(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry");
new r4 = pcre2.regex("abc");
new r5 = pcre2.regex("a(?:(b)|(c))");
new r6 = pcre2.regex("(a)");
new r7 = pcre2.regex("X(b)Y");
new r8 = pcre2.regex("(a)");
new r9 = pcre2.regex("(abcd)");
new r10 = pcre2.regex("abcd");
new r11 = pcre2.regex("a|(b)c");
}
sub vcl_recv {
return(synth(200));
}
# include vcl_synth
include "${vmod_topbuild}/src/tests/sub_macros_synth.vcl";
}
logexpect l1 -v v1 -d 0 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod pcre2 error: in r4.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in r4.sub..: invalid replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r4.sub..: expected closing curly bracket in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r4.sub..: expected closing curly bracket in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r8.sub..: expected closing curly bracket in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r9.sub..: bad escape sequence in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in r10.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in r11.sub..: unknown substring$"
expect * = End
} -start
client c1 -repeat 2 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1-1 == {"XbYdZ"}
expect resp.http.r1-2 == {"XbYdZ-XbYdZ"}
expect resp.http.r2-1 == {"Xb+dZ"}
expect resp.http.r2-2 == {"Xb+dZ-Xb+dZ-"}
expect resp.http.r3-1 == "pear orange strawberry"
expect resp.http.r3-2 == "pear strudel"
expect resp.http.r3-3 == "fruitless"
expect resp.http.r3-4 == "pear sauce lemon blackberry"
expect resp.http.r4-1 == ""
expect resp.http.r4-2 == ""
expect resp.http.r4-3 == ""
expect resp.http.r4-4 == ""
expect resp.http.r5-1 == "X1X-2"
expect resp.http.r5-2 == "X-1X2"
expect resp.http.r5-3 == "b:b"
expect resp.http.r5-4 == "c"
expect resp.http.r5-5 == "XbX2:-2"
expect resp.http.r5-6 == "X1:-1Xc"
expect resp.http.r6 == ">$1:{}$$+A<"
expect resp.http.r7-1 == "xbBY"
expect resp.http.r7-2 == "XBBY"
expect resp.http.r8 == ""
expect resp.http.r9 == ""
expect resp.http.r10-1 == ""
expect resp.http.r10-2 == ""
expect resp.http.r11-1 == ""
expect resp.http.r11-2 == "c>xx<t"
expect resp.http.r11-3 == "c>xx<t"
} -run
logexpect l1 -wait
# Test replacements with multiline mode, doesn't work with headers
# because Varnish ends headers at CRLF. So we write the replacement
# into synthetic response bodies. Need to include the code with CRs,
# since vcl.inline called during varnishtest replaces CR's with "\r"
# (slash followed by 'r').
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_init {
new r1 = pcre2.regex("^$", multiline=true, newline=ANYCRLF);
new r2 = pcre2.regex("^$", multiline=true, newline=CRLF);
new r3 = pcre2.regex("^$", multiline=true, newline=ANY);
new r4 = pcre2.regex("(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$");
}
sub vcl_recv {
return(synth(200));
}
# include vcl_synth
include "${vmod_topbuild}/src/tests/sub_multiline_synth.vcl";
}
client c1 -repeat 2 {
txreq -hdr "Test: 1"
rxresp
expect resp.status == "200"
expect resp.body ~ "^X\x0d\x0a-\x0d\x0aY$"
txreq -hdr "Test: 2"
rxresp
expect resp.status == "200"
expect resp.body ~ "^X\x0d\x0a-\x0d\x0aY$"
txreq -hdr "Test: 3"
rxresp
expect resp.status == "200"
expect resp.body ~ "^X\x0d\x0a-\x0d\x0aY$"
txreq -hdr "Test: 4"
rxresp
expect resp.status == "200"
expect resp.body ~ "^15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20$"
} -run
# vcl_synth for a test in sub.vtc that requires use of the ${...}
# substiution syntax. These cannot be used directly in a vtc, since
# varnishtest takes them for unknown macros and rejects the test.
sub vcl_synth {
set resp.http.r1-1 = r1.sub({""abcde""}, "X$1Y${2}Z");
set resp.http.r1-2
= r1.sub({""abcde-abcde""}, "X$1Y${2}Z", suball=true);
set resp.http.r2-1 = r2.sub({""abcde""}, "X$ONE+${TWO}Z");
set resp.http.r2-2
= r2.sub({""abcde-abcde-""}, "X$ONE+${TWO}Z", suball=true);
set resp.http.r3-1
= r3.sub("apple lemon blackberry", "${*MARK}", suball=true);
set resp.http.r3-2 = r3.sub("apple strudel", "${*MARK}", suball=true);
set resp.http.r3-3 = r3.sub("fruitless", "${*MARK}", suball=true);
set resp.http.r3-4 = r3.sub("apple lemon blackberry", "${*MARK} sauce");
set resp.http.r4-1
= r4.sub("123abc", "a${A234567890123456789_123456789012}z");
set resp.http.r4-2
= r4.sub("123abc", "a${A23456789012345678901234567890123}z");
set resp.http.r4-3 = r4.sub("123abc", "a${bcd");
set resp.http.r4-4 = r4.sub("123abc", "a${b+d}z");
set resp.http.r5-1
= r5.sub("ab", "X${1:+1:-1}X${2:+2:-2}", sub_extended=true);
set resp.http.r5-2
= r5.sub("ac", "X${1:+1:-1}X${2:+2:-2}", sub_extended=true);
set resp.http.r5-3 = r5.sub("ab", "${1:+$1\:$1:$2}", sub_extended=true);
set resp.http.r5-4 = r5.sub("ac", "${1:+$1\:$1:$2}", sub_extended=true);
set resp.http.r5-5
= r5.sub("ab", "X${1:-1:-1}X${2:-2:-2}", sub_extended=true);
set resp.http.r5-6
= r5.sub("ac", "X${1:-1:-1}X${2:-2:-2}", sub_extended=true);
set resp.http.r6
= r6.sub("a", ">${1:+\Q$1:{}$$\E+\U$1}<", sub_extended=true);
set resp.http.r7-1 = r7.sub("XbY", "x${1:+$1\U$1}y", sub_extended=true);
set resp.http.r7-2 = r7.sub("XbY", "\Ux${1:+$1$1}y", sub_extended=true);
set resp.http.r8 = r8.sub("a", "${*MARK:+a:b}", sub_extended=true);
set resp.http.r9 = r9.sub("abcd", "${1:+xy\kz}", sub_extended=true);
set resp.http.r10-1 = r10.sub("abcd", ">$1<", sub_extended=true);
set resp.http.r10-2
= r10.sub("abcd", ">xxx${xyz}<<<", sub_extended=true);
set resp.http.r11-1 = r11.sub("cat", ">${2:-xx}<", sub_extended=true);
set resp.http.r11-2 = r11.sub("cat", ">${2:-xx}<", sub_extended=true,
unknown_unset=true);
set resp.http.r11-3 = r11.sub("cat", ">${X:-xx}<", sub_extended=true,
unknown_unset=true);
}
# vcl_synth for a test in sub.vtc that requires CR's in the subject
# string, because vcl.inline called during varnishtest replaces them
# with "\r" (slash followed by 'r').
sub vcl_synth {
if (req.http.test == "1") {
set resp.body
= r1.sub(suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "2") {
set resp.body
= r2.sub(suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "3") {
set resp.body
= r3.sub(suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "4") {
set resp.body
= r4.sub(suball=true, replacement="NaN", subject={"15
foo
20
bar
baz
20"});
}
else {
set resp.status = 500;
}
return(deliver);
}
......@@ -111,7 +111,7 @@
VCL_INT offset_limit, VCL_BOOL notbol, VCL_BOOL noteol, \
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check, \
VCL_INT recursion_limit \
VCL_INT recursion_limit
/* Doesn't repeat the anchored and no_utf_check options */
#define MATCHF_OPTS \
......@@ -135,6 +135,10 @@
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check
#define SUB_OPTS \
VCL_BOOL suball, VCL_BOOL sub_extended, VCL_BOOL unknown_unset, \
VCL_BOOL unset_empty
struct vmod_pcre2_regex {
unsigned magic;
#define VMOD_PCRE2_REGEX_MAGIC 0x3adb2a78
......@@ -807,6 +811,109 @@ vmod_regex_namedref(VRT_CTX, struct vmod_pcre2_regex *regex, VCL_STRING name,
regex->vcl_name, "namedref", "**NAMEDREF METHOD FAILED**");
}
VCL_STRING
vmod_regex_sub(VRT_CTX, struct vmod_pcre2_regex *regex,
struct vmod_priv *priv_call, struct vmod_priv *priv_task,
VCL_STRING subject, VCL_STRING replacement, MATCH_OPTS,
SUB_OPTS)
{
pcre2_match_data *mdata;
struct task *match_task = NULL;
struct match_call *match_opts;
int ret;
PCRE2_SIZE bytes;
PCRE2_UCHAR *buf;
char *msg;
uintptr_t snap;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(regex, VMOD_PCRE2_REGEX_MAGIC);
AN(priv_task);
AN(priv_call);
if (replacement == NULL) {
VERR(ctx, "replacement is undefined in %s.sub()",
regex->vcl_name);
return NULL;
}
if (priv_call->priv == NULL) {
pcre2_match_context *mctx;
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS,
regex->vcl_name, ".sub()"))
== NULL)
return NULL;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
AN(match_opts);
match_opts->mctx = mctx;
priv_call->priv = match_opts;
priv_call->free = match_call_free;
set_match_flags(&match_opts->match_options, MATCH_FLAGS_PARAMS);
set_opt(&match_opts->match_options, suball,
PCRE2_SUBSTITUTE_GLOBAL);
set_opt(&match_opts->match_options, sub_extended,
PCRE2_SUBSTITUTE_EXTENDED);
set_opt(&match_opts->match_options, unknown_unset,
PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(&match_opts->match_options, unset_empty,
PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
else
CAST_OBJ(match_opts, priv_call->priv,
VMOD_PCRE2_MATCH_CALL_MAGIC);
/* XXX mdata in PRIV_CALL? */
if ((match_task = get_task(ctx, priv_task, regex->vcl_name, ".sub()"))
== NULL)
return NULL;
mdata = pcre2_match_data_create_from_pattern(regex->code,
match_task->gctx);
if (mdata == NULL) {
VERRNOMEM(ctx, "initializing match data in %s.sub()",
regex->vcl_name);
return NULL;
}
/*
* Don't need to ensure that the subject is in workspace, as we do
* with matches, because we won't be retrieving backrefs, and we
* give pcre2 the rest of the workspace to write the substitution.
*/
if (subject == NULL)
subject = "";
if (len == 0)
len = PCRE2_ZERO_TERMINATED;
buf = (PCRE2_UCHAR *) WS_Front(ctx->ws);
bytes = (PCRE2_SIZE) WS_Reserve(ctx->ws, 0);
/* XXX param for start_offset */
ret = pcre2_substitute(regex->code, (PCRE2_SPTR)subject, len, 0,
match_opts->match_options, mdata,
match_opts->mctx, (PCRE2_SPTR)replacement,
PCRE2_ZERO_TERMINATED, buf, &bytes);
if (ret > 0) {
WS_Release(ctx->ws, bytes + 1);
return (VCL_STRING)buf;
}
WS_Release(ctx->ws, 0);
if (ret == 0)
return subject;
if (ret == PCRE2_ERROR_NOMEMORY) {
VERRNOMEM(ctx, "allocating substitution result in %s.sub()",
regex->vcl_name);
return NULL;
}
snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "in %s.sub()", regex->vcl_name)) == NULL)
msg = "";
report_pcre2_err(ctx, ret, msg, "");
WS_Reset(ctx->ws, snap);
return NULL;
}
/* Functional interface */
VCL_BOOL
......
......@@ -43,6 +43,14 @@ $Method STRING .backref(INT ref, STRING fallback = "**BACKREF METHOD FAILED**")
$Method STRING .namedref(STRING name,
STRING fallback = "**NAMEDREF METHOD FAILED**")
$Method STRING .sub(PRIV_CALL, PRIV_TASK, STRING subject, STRING replacement,
INT len=0, BOOL anchored=0, INT match_limit=0,
INT offset_limit=0, BOOL notbol=0, BOOL noteol=0,
BOOL notempty=0, BOOL notempty_atstart=0, BOOL no_jit=0,
BOOL no_utf_check=0, INT recursion_limit=0, BOOL suball=0,
BOOL sub_extended=0, BOOL unknown_unset=0,
BOOL unset_empty=0)
$Function BOOL match(PRIV_CALL, PRIV_TASK, STRING pattern, STRING subject,
BOOL allow_empty_class=0, BOOL anchored=0,
ENUM {ANYCRLF, UNICODE} bsr=0, BOOL alt_bsux=0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment