...
 
Commits (2)
......@@ -41,6 +41,7 @@ CONTENTS
* BOOL match(PRIV_CALL, PRIV_TASK, STRING, STRING, BOOL, BOOL, ENUM {ANYCRLF,UNICODE}, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, STRING, BOOL, INT, BOOL, BOOL, BOOL, BOOL, ENUM {CR,LF,CRLF,ANYCRLF,ANY}, BOOL, BOOL, BOOL, BOOL, BOOL, INT, BOOL, BOOL, BOOL, BOOL, INT, INT, INT, BOOL, BOOL, BOOL, BOOL, BOOL, INT)
* STRING backref(PRIV_TASK, INT, STRING)
* STRING namedref(PRIV_TASK, STRING, STRING)
* STRING sub(PRIV_CALL, PRIV_TASK, STRING, STRING, STRING, BOOL, BOOL, ENUM {ANYCRLF,UNICODE}, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, BOOL, STRING, BOOL, INT, BOOL, BOOL, BOOL, BOOL, ENUM {CR,LF,CRLF,ANYCRLF,ANY}, BOOL, BOOL, BOOL, BOOL, BOOL, INT, BOOL, BOOL, BOOL, BOOL, INT, INT, INT, BOOL, BOOL, BOOL, BOOL, BOOL, INT, BOOL, BOOL, BOOL, BOOL)
* BOOL config_bool(ENUM {JIT,STACKRECURSE,UNICODE})
* STRING config_str(ENUM {BSR,JITTARGET,NEWLINE,UNICODE_VERSION,VERSION})
* INT config_int(ENUM {LINKSIZE,MATCHLIMIT,PARENSLIMIT,RECURSIONLIMIT})
......@@ -120,6 +121,15 @@ namedref
STRING namedref(PRIV_TASK, STRING name, STRING fallback="**NAMEDREF FUNCTION FAILED**")
.. _func_sub:
sub
---
::
STRING sub(PRIV_CALL, PRIV_TASK, STRING pattern, STRING subject, STRING replacement, BOOL allow_empty_class=0, BOOL anchored=0, ENUM {ANYCRLF,UNICODE} bsr=0, BOOL alt_bsux=0, BOOL alt_circumflex=0, BOOL alt_verbnames=0, BOOL caseless=0, BOOL dollar_endonly=0, BOOL dotall=0, BOOL dupnames=0, BOOL extended=0, BOOL firstline=0, STRING locale=0, BOOL match_unset_backref=0, INT max_pattern_len=0, BOOL multiline=0, BOOL never_backslash_c=0, BOOL never_ucp=0, BOOL never_utf=0, ENUM {CR,LF,CRLF,ANYCRLF,ANY} newline=0, BOOL no_auto_capture=0, BOOL no_auto_possess=0, BOOL no_dotstar_anchor=0, BOOL no_start_optimize=0, BOOL no_utf_check=0, INT parens_nest_limit=0, BOOL ucp=0, BOOL ungreedy=0, BOOL use_offset_limit=0, BOOL utf=0, INT len=0, INT match_limit=0, INT offset_limit=0, BOOL notbol=0, BOOL noteol=0, BOOL notempty=0, BOOL notempty_atstart=0, BOOL no_jit=0, INT recursion_limit=0, BOOL suball=0, BOOL sub_extended=0, BOOL unknown_unset=0, BOOL unset_empty=0)
.. _func_config_bool:
config_bool
......
......@@ -123,6 +123,7 @@ client c1 -repeat 2 {
expect resp.http.r3 == "a"
expect resp.http.r4-1 == "b+ac"
expect resp.http.r4-2 == "badcfehg"
expect resp.http.r5 == "b+ac"
expect resp.http.r6-1 == "<pear> <orange> <strawberry>"
expect resp.http.r6-2 == "<pear> strudel"
expect resp.http.r6-3 == "fruitless"
......@@ -285,3 +286,251 @@ client c1 -repeat 2 {
expect resp.status == "200"
expect resp.body ~ "^15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20$"
} -run
# The same tests again with the sub() function
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.r1-1 = pcre2.sub("abc", "123123", "XYZ");
set resp.http.r1-2 = pcre2.sub("abc", "123abc123", "XYZ");
set resp.http.r1-3 = pcre2.sub("abc", "123abc123abc123", "XYZ");
set resp.http.r1-4
= pcre2.sub("abc", "123abc123", "XYZ", suball=true);
set resp.http.r1-5
= pcre2.sub("abc", "123abc123abc123", "XYZ", suball=true);
set resp.http.r1-6 = pcre2.sub("abc", "123abc123", "X$$Z");
set resp.http.r1-7
= pcre2.sub("abc", "123abc123abc123", "X$$Z", suball=true);
set resp.http.r2
= pcre2.sub("(?<=abc)(|def)", "123abcxyzabcdef789abcpqr",
"<$0>", suball=true);
set resp.http.r3 = pcre2.sub(".", "a", "$0");
set resp.http.r4-1 = pcre2.sub("(.)(.)", "abc", "$2+$1");
set resp.http.r4-2
= pcre2.sub("(.)(.)", "abcdefgh", "$2$1", suball=true);
set resp.http.r5 = pcre2.sub("(?<A>.)(?<B>.)", "abc", "$B+$A");
set resp.http.r6-1 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"apple lemon blackberry", "<$*MARK>", suball=true);
set resp.http.r6-2 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"apple strudel", "<$*MARK>", suball=true);
set resp.http.r6-3 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"fruitless", "<$*MARK>", suball=true);
set resp.http.r7
= pcre2.sub("A", "XAXAXAXAXA", "-", suball=true,
use_offset_limit=true, offset_limit=4);
set resp.http.r8
= pcre2.sub("abcd", "abcd",
"w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$)",
sub_extended=true);
set resp.http.r9
= pcre2.sub("a(bc)(DE)", "abcDE",
"a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone",
sub_extended=true);
set resp.http.r10-1
= pcre2.sub("(?J)(?:(?<A>a)|(?<A>b))", "[a]", "<$A>");
set resp.http.r10-2
= pcre2.sub("(?J)(?:(?<A>a)|(?<A>b))", "[b]", "<$A>");
set resp.http.r12
= pcre2.sub("(aa)(BB)", "aaBB", "\U$1\L$2\E$1..\U$1\l$2$1",
sub_extended=true);
set resp.http.r14-1
= pcre2.sub("a|(b)c", "cat", ">$1<", unset_empty=true);
set resp.http.r14-2
= pcre2.sub("a|(b)c", "xbcom", ">$1<", unset_empty=true);
set resp.http.r14-3
= pcre2.sub("a|(b)c", "cat", ">$2<", unset_empty=true,
unknown_unset=true);
set resp.http.r15-1
= pcre2.sub("a|(?'X'b)c", "cat", ">$X<", unset_empty=true);
set resp.http.r15-2
= pcre2.sub("a|(?'X'b)c", "xbcom", ">$X<",
unset_empty=true);
set resp.http.r15-3
= pcre2.sub("a|(?'X'b)c", "cat", ">$Y<", unset_empty=true,
unknown_unset=true);
# failures
set resp.http.r1fail-1 = pcre2.sub("abc", "123abc", "a$++");
set resp.http.r1fail-2 = pcre2.sub("abc", "123abc", "a$bad");
set resp.http.r1fail-3
= pcre2.sub("abc", "abc", "A$3123456789Z");
set resp.http.r8fail
= pcre2.sub("abcd", "abcd", "xy\kz", sub_extended=true);
set resp.http.r11fail = pcre2.sub("(a)|(b)", "b", "<$1>");
set resp.http.r13fail = pcre2.sub("(?=a\K)", "BaCaD", "z");
set resp.http.r14fail-1
= pcre2.sub("a|(b)c", "cat", ">$2<", unset_empty=true);
set resp.http.r14fail-2
= pcre2.sub("a|(b)c", "cat", ">$2<", unknown_unset=true);
set resp.http.r15fail-1
= pcre2.sub("a|(?'X'b)c", "cat", ">$Y<", unset_empty=true);
set resp.http.r15fail-2
= pcre2.sub("a|(?'X'b)c", "cat", ">$Y<",
unknown_unset=true);
return(deliver);
}
}
logexpect l1 -v v1 -d 0 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: invalid replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: bad escape sequence in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: requested value is not set$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: match with end before start is not supported$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: requested value is not set"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: requested value is not set"
expect * = End
} -start
client c1 -repeat 2 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1-1 == "123123"
expect resp.http.r1-2 == "123XYZ123"
expect resp.http.r1-3 == "123XYZ123abc123"
expect resp.http.r1-4 == "123XYZ123"
expect resp.http.r1-5 == "123XYZ123XYZ123"
expect resp.http.r1-6 == "123X$Z123"
expect resp.http.r1-7 == "123X$Z123X$Z123"
expect resp.http.r2 == "123abc<>xyzabc<><def>789abc<>pqr"
expect resp.http.r3 == "a"
expect resp.http.r4-1 == "b+ac"
expect resp.http.r4-2 == "badcfehg"
expect resp.http.r5 == "b+ac"
expect resp.http.r6-1 == "<pear> <orange> <strawberry>"
expect resp.http.r6-2 == "<pear> strudel"
expect resp.http.r6-3 == "fruitless"
expect resp.http.r7 == "X-X-XAXAXA"
expect resp.http.r8 == {w xyz(12\$34$$\x345$)}
expect resp.http.r9 == "aBcBCbcdEdeabAByzDone"
expect resp.http.r10-1 == "[<a>]"
expect resp.http.r10-2 == "[<b>]"
expect resp.http.r12 == "AAbbaa..AAbBaa"
expect resp.http.r14-1 == "c><t"
expect resp.http.r14-2 == "x>b<om"
expect resp.http.r14-3 == "c><t"
expect resp.http.r15-1 == "c><t"
expect resp.http.r15-2 == "x>b<om"
expect resp.http.r15-3 == "c><t"
expect resp.http.r1fail-1 == ""
expect resp.http.r1fail-2 == ""
expect resp.http.r1fail-3 == ""
expect resp.http.r8fail == ""
expect resp.http.r11fail == ""
expect resp.http.r13fail == ""
expect resp.http.r14fail-1 == ""
expect resp.http.r14fail-2 == ""
expect resp.http.r15fail-1 == ""
expect resp.http.r15fail-2 == ""
} -run
logexpect l1 -wait
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_recv {
return(synth(200));
}
# include vcl_synth
include "${vmod_topbuild}/src/tests/sub_func_macros_synth.vcl";
}
logexpect l1 -v v1 -d 0 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: invalid replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: expected closing curly bracket in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: expected closing curly bracket in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: expected closing curly bracket in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: bad escape sequence in replacement string$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring$"
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: unknown substring$"
expect * = End
} -start
client c1 -repeat 2 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1-1 == {"XbYdZ"}
expect resp.http.r1-2 == {"XbYdZ-XbYdZ"}
expect resp.http.r2-1 == {"Xb+dZ"}
expect resp.http.r2-2 == {"Xb+dZ-Xb+dZ-"}
expect resp.http.r3-1 == "pear orange strawberry"
expect resp.http.r3-2 == "pear strudel"
expect resp.http.r3-3 == "fruitless"
expect resp.http.r3-4 == "pear sauce lemon blackberry"
expect resp.http.r4-1 == ""
expect resp.http.r4-2 == ""
expect resp.http.r4-3 == ""
expect resp.http.r4-4 == ""
expect resp.http.r5-1 == "X1X-2"
expect resp.http.r5-2 == "X-1X2"
expect resp.http.r5-3 == "b:b"
expect resp.http.r5-4 == "c"
expect resp.http.r5-5 == "XbX2:-2"
expect resp.http.r5-6 == "X1:-1Xc"
expect resp.http.r6 == ">$1:{}$$+A<"
expect resp.http.r7-1 == "xbBY"
expect resp.http.r7-2 == "XBBY"
expect resp.http.r8 == ""
expect resp.http.r9 == ""
expect resp.http.r10-1 == ""
expect resp.http.r10-2 == ""
expect resp.http.r11-1 == ""
expect resp.http.r11-2 == "c>xx<t"
expect resp.http.r11-3 == "c>xx<t"
} -run
logexpect l1 -wait
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_recv {
return(synth(200));
}
# include vcl_synth
include "${vmod_topbuild}/src/tests/sub_func_multiline_synth.vcl";
}
client c1 -repeat 2 {
txreq -hdr "Test: 1"
rxresp
expect resp.status == "200"
expect resp.body ~ "^X\x0d\x0a-\x0d\x0aY$"
txreq -hdr "Test: 2"
rxresp
expect resp.status == "200"
expect resp.body ~ "^X\x0d\x0a-\x0d\x0aY$"
txreq -hdr "Test: 3"
rxresp
expect resp.status == "200"
expect resp.body ~ "^X\x0d\x0a-\x0d\x0aY$"
txreq -hdr "Test: 4"
rxresp
expect resp.status == "200"
expect resp.body ~ "^15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20$"
} -run
......
# vcl_synth for a test in sub.vtc that requires use of the ${...}
# substiution syntax. These cannot be used directly in a vtc, since
# varnishtest takes them for unknown macros and rejects the test.
sub vcl_synth {
set resp.http.r1-1 = pcre2.sub("a(b)c(d)e", {""abcde""}, "X$1Y${2}Z");
set resp.http.r1-2
= pcre2.sub("a(b)c(d)e", {""abcde-abcde""}, "X$1Y${2}Z",
suball=true);
set resp.http.r2-1
= pcre2.sub("a(?<ONE>b)c(?<TWO>d)e", {""abcde""}, "X$ONE+${TWO}Z");
set resp.http.r2-2
= pcre2.sub("a(?<ONE>b)c(?<TWO>d)e", {""abcde-abcde-""},
"X$ONE+${TWO}Z", suball=true);
set resp.http.r3-1 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"apple lemon blackberry", "${*MARK}", suball=true);
set resp.http.r3-2 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"apple strudel", "${*MARK}", suball=true);
set resp.http.r3-3 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"fruitless", "${*MARK}", suball=true);
set resp.http.r3-4 = pcre2.sub(
"(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry",
"apple lemon blackberry", "${*MARK} sauce");
set resp.http.r4-1
= pcre2.sub("abc", "123abc",
"a${A234567890123456789_123456789012}z");
set resp.http.r4-2
= pcre2.sub("abc", "123abc",
"a${A23456789012345678901234567890123}z");
set resp.http.r4-3 = pcre2.sub("abc", "123abc", "a${bcd");
set resp.http.r4-4 = pcre2.sub("abc", "123abc", "a${b+d}z");
set resp.http.r5-1
= pcre2.sub("a(?:(b)|(c))", "ab", "X${1:+1:-1}X${2:+2:-2}",
sub_extended=true);
set resp.http.r5-2
= pcre2.sub("a(?:(b)|(c))", "ac", "X${1:+1:-1}X${2:+2:-2}",
sub_extended=true);
set resp.http.r5-3
= pcre2.sub("a(?:(b)|(c))", "ab", "${1:+$1\:$1:$2}",
sub_extended=true);
set resp.http.r5-4
= pcre2.sub("a(?:(b)|(c))", "ac", "${1:+$1\:$1:$2}",
sub_extended=true);
set resp.http.r5-5
= pcre2.sub("a(?:(b)|(c))", "ab", "X${1:-1:-1}X${2:-2:-2}",
sub_extended=true);
set resp.http.r5-6
= pcre2.sub("a(?:(b)|(c))", "ac", "X${1:-1:-1}X${2:-2:-2}",
sub_extended=true);
set resp.http.r6
= pcre2.sub("(a)", "a", ">${1:+\Q$1:{}$$\E+\U$1}<",
sub_extended=true);
set resp.http.r7-1
= pcre2.sub("X(b)Y", "XbY", "x${1:+$1\U$1}y", sub_extended=true);
set resp.http.r7-2
= pcre2.sub("X(b)Y", "XbY", "\Ux${1:+$1$1}y", sub_extended=true);
set resp.http.r8
= pcre2.sub("(a)", "a", "${*MARK:+a:b}", sub_extended=true);
set resp.http.r9
= pcre2.sub("(abcd)", "abcd", "${1:+xy\kz}", sub_extended=true);
set resp.http.r10-1
= pcre2.sub("abcd", "abcd", ">$1<", sub_extended=true);
set resp.http.r10-2
= pcre2.sub("abcd", "abcd", ">xxx${xyz}<<<", sub_extended=true);
set resp.http.r11-1
= pcre2.sub("a|(b)c", "cat", ">${2:-xx}<", sub_extended=true);
set resp.http.r11-2
= pcre2.sub("a|(b)c", "cat", ">${2:-xx}<", sub_extended=true,
unknown_unset=true);
set resp.http.r11-3
= pcre2.sub("a|(b)c", "cat", ">${X:-xx}<", sub_extended=true,
unknown_unset=true);
}
# vcl_synth for a test in sub.vtc that requires CR's in the subject
# string, because vcl.inline called during varnishtest replaces them
# with "\r" (slash followed by 'r').
sub vcl_synth {
if (req.http.test == "1") {
set resp.body
= pcre2.sub("^$", multiline=true, newline=ANYCRLF,
suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "2") {
set resp.body
= pcre2.sub("^$", multiline=true, newline=CRLF, suball=true,
replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "3") {
set resp.body
= pcre2.sub("^$", multiline=true, newline=ANY, suball=true,
replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "4") {
set resp.body
= pcre2.sub("(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$",
suball=true, replacement="NaN", subject={"15
foo
20
bar
baz
20"});
}
else {
set resp.status = 500;
}
return(deliver);
}
# This line is included from sub_utf8.vtc.yes, run as sub_utf8.vtc
# when Unicode is enabled for libpcre2. The extended syntax for pcre2
# substitutions includes the form ${...}, which varnishtest always
# interprets as a macro in a vtc script. So this snippet is included
# as a workaround.
set resp.http.r5 = pcre2.sub("((?<digit>\d)|(?<letter>\p{L}))", "ab12cde",
suball=true, utf=true, replacement=
"<${digit:+digit; :not digit; }${letter:+letter:not a letter}>",
sub_extended=true);
......@@ -60,3 +60,61 @@ client c1 {
expect resp.http.r5 == "<not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter>"
expect resp.http.r6 == ""
} -run
logexpect l1 -v v1 -d 1 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod pcre2 error: in r6.sub..: UTF-8 error: 1 byte missing at end"
expect * = End
} -run
# The same tests with the sub() function.
varnish v1 -vcl {
import pcre2 from "${vmod_topbuild}/src/.libs/libvmod_pcre2.so";
backend b { .host = "${bad_ip}"; }
sub vcl_recv {
return(synth(200));
}
sub vcl_synth {
set resp.http.r1 = pcre2.sub("ábc", "123ábc123",
"XሴZ", utf=true);
set resp.http.r2 = pcre2.sub("(?<=abc)(|def)",
"123abcáyzabcdef789abcሴqr",
"<$0>", utf=true, suball=true);
set resp.http.r3 = pcre2.sub("abcd", "abcd",
"x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$)", utf=true,
sub_extended=true);
set resp.http.r4 = pcre2.sub("a(\x{e0}\x{101})(\x{c0}\x{102})",
"aàāÀĂ",
"a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone",
utf=true, sub_extended=true);
include "${vmod_topbuild}/src/tests/sub_func_utf8_macro.vcl";
set resp.http.r6 = pcre2.sub("abc", "abc", "", utf=true);
return(deliver);
}
}
logexpect l1 -v v1 -d 0 -g vxid -q "VCL_Error" {
expect 0 * Begin req
expect * = VCL_Error "^vmod pcre2 error: in pcre2.sub..: UTF-8 error: 1 byte missing at end"
expect * = End
} -start
client c1 {
txreq
rxresp
expect resp.status == "200"
expect resp.http.r1 == "123XሴZ123"
expect resp.http.r2 == "123abc<>áyzabc<><def>789abc<>ሴqr"
expect resp.http.r3 == {xࠤyۛz(12\$34$$\x345$)}
expect resp.http.r4 == {aÀāÀĀàāàĂàăabÀĀðšDone}
expect resp.http.r5 == "<not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter>"
expect resp.http.r6 == ""
} -run
logexpect l1 -wait
......@@ -131,9 +131,14 @@
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check
#define MATCH_SUB_FLAGS_PARAMS \
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check, suball, sub_extended, unknown_unset, \
unset_empty \
#define MATCH_FLAGS_PARAMS \
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check
no_utf_check, 0, 0, 0, 0
#define SUB_OPTS \
VCL_BOOL suball, VCL_BOOL sub_extended, VCL_BOOL unknown_unset, \
......@@ -409,6 +414,61 @@ get_match_context(VRT_CTX, MATCH_CTX_OPTS, const char * restrict const context,
return mctx;
}
static inline void
set_opt(uint32_t *options, VCL_BOOL vmod_opt, uint32_t pcre2_opt)
{
if (vmod_opt)
*options |= pcre2_opt;
}
static inline void
set_match_flags(uint32_t *options, MATCH_FLAGS, SUB_OPTS)
{
set_opt(options, anchored, PCRE2_ANCHORED);
set_opt(options, notbol, PCRE2_NOTBOL);
set_opt(options, noteol, PCRE2_NOTEOL);
set_opt(options, notempty, PCRE2_NOTEMPTY);
set_opt(options, notempty_atstart, PCRE2_NOTEMPTY_ATSTART);
set_opt(options, no_jit, PCRE2_NO_JIT);
set_opt(options, no_utf_check, PCRE2_NO_UTF_CHECK);
set_opt(options, suball, PCRE2_SUBSTITUTE_GLOBAL);
set_opt(options, sub_extended, PCRE2_SUBSTITUTE_EXTENDED);
set_opt(options, unknown_unset, PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(options, unset_empty, PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
static inline struct match_call *
get_match_opts(VRT_CTX, struct vmod_priv *priv, MATCH_CTX_OPTS, MATCH_FLAGS,
SUB_OPTS, const char *context, const char *caller)
{
struct match_call *match_opts;
pcre2_match_context *mctx;
if (priv->priv != NULL) {
CAST_OBJ(match_opts, priv->priv, VMOD_PCRE2_MATCH_CALL_MAGIC);
return match_opts;
}
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS, context, caller))
== NULL)
return NULL;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
if (match_opts == NULL) {
VERRNOMEM(ctx, "allocating call-scoped match options in %s%s",
context, caller);
return NULL;
}
match_opts->mctx = mctx;
set_match_flags(&match_opts->match_options, MATCH_SUB_FLAGS_PARAMS);
priv->priv = match_opts;
priv->free = match_call_free;
priv->len = sizeof(*match_opts);
return match_opts;
}
static inline struct task *
get_task(VRT_CTX, struct vmod_priv *priv_task,
const char * const restrict context,
......@@ -454,13 +514,6 @@ get_task(VRT_CTX, struct vmod_priv *priv_task,
return match_task;
}
static inline void
set_opt(uint32_t *options, VCL_BOOL vmod_opt, uint32_t pcre2_opt)
{
if (vmod_opt)
*options |= pcre2_opt;
}
static inline void
set_compile_flags(uint32_t *options, COMPILE_FLAGS)
{
......@@ -492,18 +545,6 @@ set_compile_flags(uint32_t *options, COMPILE_FLAGS)
set_opt(options, utf, PCRE2_UTF);
}
static inline void
set_match_flags(uint32_t *options, MATCH_FLAGS)
{
set_opt(options, anchored, PCRE2_ANCHORED);
set_opt(options, notbol, PCRE2_NOTBOL);
set_opt(options, noteol, PCRE2_NOTEOL);
set_opt(options, notempty, PCRE2_NOTEMPTY);
set_opt(options, notempty_atstart, PCRE2_NOTEMPTY_ATSTART);
set_opt(options, no_jit, PCRE2_NO_JIT);
set_opt(options, no_utf_check, PCRE2_NO_UTF_CHECK);
}
static pcre2_code *
compile(VRT_CTX, pcre2_compile_context * restrict const cctx,
VCL_STRING const restrict pattern, uint32_t options, int do_jit,
......@@ -733,25 +774,11 @@ vmod_regex_match(VRT_CTX, struct vmod_pcre2_regex *regex,
/* The match context and options are PRIV_CALL-scoped. */
/* XXX param to decide whether these should be saved for the call */
if (priv_call->priv == NULL) {
pcre2_match_context *mctx;
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS,
regex->vcl_name, ".match()"))
== NULL)
return 0;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
AN(match_opts);
match_opts->mctx = mctx;
priv_call->priv = match_opts;
priv_call->free = match_call_free;
set_match_flags(&match_opts->match_options, MATCH_FLAGS_PARAMS);
}
else
CAST_OBJ(match_opts, priv_call->priv,
VMOD_PCRE2_MATCH_CALL_MAGIC);
match_opts = get_match_opts(ctx, priv_call, MATCH_CTX_PARAMS,
MATCH_FLAGS_PARAMS, regex->vcl_name,
".match()");
if (match_opts == NULL)
return 0;
/* The match data block is task-scoped for this object only. */
obj_task = VRT_priv_task(ctx, regex);
......@@ -836,33 +863,11 @@ vmod_regex_sub(VRT_CTX, struct vmod_pcre2_regex *regex,
return NULL;
}
if (priv_call->priv == NULL) {
pcre2_match_context *mctx;
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS,
regex->vcl_name, ".sub()"))
== NULL)
return NULL;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
AN(match_opts);
match_opts->mctx = mctx;
priv_call->priv = match_opts;
priv_call->free = match_call_free;
set_match_flags(&match_opts->match_options, MATCH_FLAGS_PARAMS);
set_opt(&match_opts->match_options, suball,
PCRE2_SUBSTITUTE_GLOBAL);
set_opt(&match_opts->match_options, sub_extended,
PCRE2_SUBSTITUTE_EXTENDED);
set_opt(&match_opts->match_options, unknown_unset,
PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(&match_opts->match_options, unset_empty,
PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
else
CAST_OBJ(match_opts, priv_call->priv,
VMOD_PCRE2_MATCH_CALL_MAGIC);
match_opts = get_match_opts(ctx, priv_call, MATCH_CTX_PARAMS,
MATCH_SUB_FLAGS_PARAMS, regex->vcl_name,
".match()");
if (match_opts == NULL)
return 0;
/* XXX mdata in PRIV_CALL? */
if ((match_task = get_task(ctx, priv_task, regex->vcl_name, ".sub()"))
......@@ -1032,6 +1037,120 @@ vmod_namedref(VRT_CTX, struct vmod_priv *task, VCL_STRING name,
"namedref", "**NAMEDREF FUNCTION FAILED**");
}
VCL_STRING
vmod_sub(VRT_CTX, struct vmod_priv *priv_call, struct vmod_priv *priv_task,
VCL_STRING pattern, VCL_STRING subject, VCL_STRING replacement,
COMPILE_OPTS, MATCHF_OPTS, SUB_OPTS)
{
pcre2_match_data *mdata;
struct task *match_task = NULL;
struct match_call *match_opts;
pcre2_code *code;
int ret;
PCRE2_SIZE bytes;
PCRE2_UCHAR *buf;
char *msg;
uintptr_t snap;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
AN(priv_task);
AN(priv_call);
if (pattern == NULL) {
ERR(ctx, "pattern is undefined in pcre2.sub()");
return NULL;
}
if (replacement == NULL) {
ERR(ctx, "replacement is undefined in pcre2.sub()");
return NULL;
}
if (priv_call->priv == NULL) {
pcre2_compile_context *cctx;
pcre2_match_context *mctx;
if ((cctx = get_compile_context(ctx, COMPILE_CTX_PARAMS,
"pcre2", ".match()")) == NULL)
return NULL;
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS, "pcre2",
".sub()")) == NULL)
return NULL;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
AN(match_opts);
match_opts->cctx = cctx;
match_opts->mctx = mctx;
priv_call->priv = match_opts;
priv_call->free = match_call_free;
set_compile_flags(&match_opts->compile_options,
COMPILE_FLAGS_PARAMS);
set_match_flags(&match_opts->match_options, MATCH_FLAGS_PARAMS);
set_opt(&match_opts->match_options, suball,
PCRE2_SUBSTITUTE_GLOBAL);
set_opt(&match_opts->match_options, sub_extended,
PCRE2_SUBSTITUTE_EXTENDED);
set_opt(&match_opts->match_options, unknown_unset,
PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(&match_opts->match_options, unset_empty,
PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
else
CAST_OBJ(match_opts, priv_call->priv,
VMOD_PCRE2_MATCH_CALL_MAGIC);
if ((code = compile(ctx, match_opts->cctx, pattern,
match_opts->compile_options, have_jit && !no_jit,
"pcre2", ".match()")) == NULL)
return NULL;
/* XXX mdata in PRIV_CALL? */
if ((match_task = get_task(ctx, priv_task, "pcre2", ".sub()"))
== NULL)
return NULL;
mdata = pcre2_match_data_create_from_pattern(code, match_task->gctx);
if (mdata == NULL) {
ERRNOMEM(ctx, "initializing match data in pcre2.sub()");
return NULL;
}
/*
* Don't need to ensure that the subject is in workspace, as we do
* with matches, because we won't be retrieving backrefs, and we
* give pcre2 the rest of the workspace to write the substitution.
*/
if (subject == NULL)
subject = "";
if (len == 0)
len = PCRE2_ZERO_TERMINATED;
buf = (PCRE2_UCHAR *) WS_Front(ctx->ws);
bytes = (PCRE2_SIZE) WS_Reserve(ctx->ws, 0);
/* XXX param for start_offset */
ret = pcre2_substitute(code, (PCRE2_SPTR)subject, len, 0,
match_opts->match_options, mdata,
match_opts->mctx, (PCRE2_SPTR)replacement,
PCRE2_ZERO_TERMINATED, buf, &bytes);
pcre2_code_free(code);
if (ret > 0) {
WS_Release(ctx->ws, bytes + 1);
return (VCL_STRING)buf;
}
WS_Release(ctx->ws, 0);
if (ret == 0)
return subject;
if (ret == PCRE2_ERROR_NOMEMORY) {
ERRNOMEM(ctx, "allocating substitution result in pcre2.sub()");
return NULL;
}
snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "in pcre2.sub()")) == NULL)
msg = "";
report_pcre2_err(ctx, ret, msg, "");
WS_Reset(ctx->ws, snap);
return NULL;
}
/* Functions */
VCL_BOOL
......
......@@ -77,6 +77,27 @@ $Function STRING backref(PRIV_TASK, INT ref,
$Function STRING namedref(PRIV_TASK, STRING name,
STRING fallback = "**NAMEDREF FUNCTION FAILED**")
$Function STRING sub(PRIV_CALL, PRIV_TASK, STRING pattern, STRING subject,
STRING replacement, BOOL allow_empty_class=0,
BOOL anchored=0, ENUM {ANYCRLF, UNICODE} bsr=0,
BOOL alt_bsux=0, BOOL alt_circumflex=0,
BOOL alt_verbnames=0, BOOL caseless=0,
BOOL dollar_endonly=0, BOOL dotall=0, BOOL dupnames=0,
BOOL extended=0, BOOL firstline=0, STRING locale=0,
BOOL match_unset_backref=0, INT max_pattern_len=0,
BOOL multiline=0, BOOL never_backslash_c=0,
BOOL never_ucp=0, BOOL never_utf=0,
ENUM {CR, LF, CRLF, ANYCRLF, ANY} newline=0,
BOOL no_auto_capture=0, BOOL no_auto_possess=0,
BOOL no_dotstar_anchor=0, BOOL no_start_optimize=0,
BOOL no_utf_check=0, INT parens_nest_limit=0, BOOL ucp=0,
BOOL ungreedy=0, BOOL use_offset_limit=0, BOOL utf=0,
INT len=0, INT match_limit=0, INT offset_limit=0,
BOOL notbol=0, BOOL noteol=0, BOOL notempty=0,
BOOL notempty_atstart=0, BOOL no_jit=0,
INT recursion_limit=0, BOOL suball=0, BOOL sub_extended=0,
BOOL unknown_unset=0, BOOL unset_empty=0)
$Function BOOL config_bool(ENUM {JIT, STACKRECURSE, UNICODE})
$Function STRING config_str(ENUM {BSR, JITTARGET, NEWLINE, UNICODE_VERSION,
......