Commit f81d9f81 by Geoff Simmons

Add the sub() method.

Still with plenty of repeated code to be factored out.
parent 3703c0b0
Pipeline #248 skipped
......@@ -84,6 +84,15 @@ regex.namedref
STRING regex.namedref(STRING name, STRING fallback="**NAMEDREF METHOD FAILED**")
.. _func_regex.sub:
regex.sub
---------
::
STRING regex.sub(PRIV_CALL, PRIV_TASK, STRING subject, STRING replacement, INT len=0, BOOL anchored=0, INT match_limit=0, INT offset_limit=0, BOOL notbol=0, BOOL noteol=0, BOOL notempty=0, BOOL notempty_atstart=0, BOOL no_jit=0, BOOL no_utf_check=0, INT recursion_limit=0, BOOL suball=0, BOOL sub_extended=0, BOOL unknown_unset=0, BOOL unset_empty=0)
.. _func_match:
match
......
# vcl_synth for a test in sub.vtc that requires use of the ${...}
# substiution syntax. These cannot be used directly in a vtc, since
# varnishtest takes them for unknown macros and rejects the test.
sub vcl_synth {
set resp.http.r1-1 = r1.sub({""abcde""}, "X$1Y${2}Z");
set resp.http.r1-2
= r1.sub({""abcde-abcde""}, "X$1Y${2}Z", suball=true);
set resp.http.r2-1 = r2.sub({""abcde""}, "X$ONE+${TWO}Z");
set resp.http.r2-2
= r2.sub({""abcde-abcde-""}, "X$ONE+${TWO}Z", suball=true);
set resp.http.r3-1
= r3.sub("apple lemon blackberry", "${*MARK}", suball=true);
set resp.http.r3-2 = r3.sub("apple strudel", "${*MARK}", suball=true);
set resp.http.r3-3 = r3.sub("fruitless", "${*MARK}", suball=true);
set resp.http.r3-4 = r3.sub("apple lemon blackberry", "${*MARK} sauce");
set resp.http.r4-1
= r4.sub("123abc", "a${A234567890123456789_123456789012}z");
set resp.http.r4-2
= r4.sub("123abc", "a${A23456789012345678901234567890123}z");
set resp.http.r4-3 = r4.sub("123abc", "a${bcd");
set resp.http.r4-4 = r4.sub("123abc", "a${b+d}z");
set resp.http.r5-1
= r5.sub("ab", "X${1:+1:-1}X${2:+2:-2}", sub_extended=true);
set resp.http.r5-2
= r5.sub("ac", "X${1:+1:-1}X${2:+2:-2}", sub_extended=true);
set resp.http.r5-3 = r5.sub("ab", "${1:+$1\:$1:$2}", sub_extended=true);
set resp.http.r5-4 = r5.sub("ac", "${1:+$1\:$1:$2}", sub_extended=true);
set resp.http.r5-5
= r5.sub("ab", "X${1:-1:-1}X${2:-2:-2}", sub_extended=true);
set resp.http.r5-6
= r5.sub("ac", "X${1:-1:-1}X${2:-2:-2}", sub_extended=true);
set resp.http.r6
= r6.sub("a", ">${1:+\Q$1:{}$$\E+\U$1}<", sub_extended=true);
set resp.http.r7-1 = r7.sub("XbY", "x${1:+$1\U$1}y", sub_extended=true);
set resp.http.r7-2 = r7.sub("XbY", "\Ux${1:+$1$1}y", sub_extended=true);
set resp.http.r8 = r8.sub("a", "${*MARK:+a:b}", sub_extended=true);
set resp.http.r9 = r9.sub("abcd", "${1:+xy\kz}", sub_extended=true);
set resp.http.r10-1 = r10.sub("abcd", ">$1<", sub_extended=true);
set resp.http.r10-2
= r10.sub("abcd", ">xxx${xyz}<<<", sub_extended=true);
set resp.http.r11-1 = r11.sub("cat", ">${2:-xx}<", sub_extended=true);
set resp.http.r11-2 = r11.sub("cat", ">${2:-xx}<", sub_extended=true,
unknown_unset=true);
set resp.http.r11-3 = r11.sub("cat", ">${X:-xx}<", sub_extended=true,
unknown_unset=true);
}
# vcl_synth for a test in sub.vtc that requires CR's in the subject
# string, because vcl.inline called during varnishtest replaces them
# with "\r" (slash followed by 'r').
sub vcl_synth {
if (req.http.test == "1") {
set resp.body
= r1.sub(suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "2") {
set resp.body
= r2.sub(suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "3") {
set resp.body
= r3.sub(suball=true, replacement="-", subject={"X
Y"});
}
elsif (req.http.test == "4") {
set resp.body
= r4.sub(suball=true, replacement="NaN", subject={"15
foo
20
bar
baz
20"});
}
else {
set resp.status = 500;
}
return(deliver);
}
......@@ -111,7 +111,7 @@
VCL_INT offset_limit, VCL_BOOL notbol, VCL_BOOL noteol, \
VCL_BOOL notempty, VCL_BOOL notempty_atstart, \
VCL_BOOL no_jit, VCL_BOOL no_utf_check, \
VCL_INT recursion_limit \
VCL_INT recursion_limit
/* Doesn't repeat the anchored and no_utf_check options */
#define MATCHF_OPTS \
......@@ -135,6 +135,10 @@
anchored, notbol, noteol, notempty, notempty_atstart, no_jit, \
no_utf_check
#define SUB_OPTS \
VCL_BOOL suball, VCL_BOOL sub_extended, VCL_BOOL unknown_unset, \
VCL_BOOL unset_empty
struct vmod_pcre2_regex {
unsigned magic;
#define VMOD_PCRE2_REGEX_MAGIC 0x3adb2a78
......@@ -807,6 +811,109 @@ vmod_regex_namedref(VRT_CTX, struct vmod_pcre2_regex *regex, VCL_STRING name,
regex->vcl_name, "namedref", "**NAMEDREF METHOD FAILED**");
}
VCL_STRING
vmod_regex_sub(VRT_CTX, struct vmod_pcre2_regex *regex,
struct vmod_priv *priv_call, struct vmod_priv *priv_task,
VCL_STRING subject, VCL_STRING replacement, MATCH_OPTS,
SUB_OPTS)
{
pcre2_match_data *mdata;
struct task *match_task = NULL;
struct match_call *match_opts;
int ret;
PCRE2_SIZE bytes;
PCRE2_UCHAR *buf;
char *msg;
uintptr_t snap;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(regex, VMOD_PCRE2_REGEX_MAGIC);
AN(priv_task);
AN(priv_call);
if (replacement == NULL) {
VERR(ctx, "replacement is undefined in %s.sub()",
regex->vcl_name);
return NULL;
}
if (priv_call->priv == NULL) {
pcre2_match_context *mctx;
if ((mctx = get_match_context(ctx, MATCH_CTX_PARAMS,
regex->vcl_name, ".sub()"))
== NULL)
return NULL;
ALLOC_OBJ(match_opts, VMOD_PCRE2_MATCH_CALL_MAGIC);
AN(match_opts);
match_opts->mctx = mctx;
priv_call->priv = match_opts;
priv_call->free = match_call_free;
set_match_flags(&match_opts->match_options, MATCH_FLAGS_PARAMS);
set_opt(&match_opts->match_options, suball,
PCRE2_SUBSTITUTE_GLOBAL);
set_opt(&match_opts->match_options, sub_extended,
PCRE2_SUBSTITUTE_EXTENDED);
set_opt(&match_opts->match_options, unknown_unset,
PCRE2_SUBSTITUTE_UNKNOWN_UNSET);
set_opt(&match_opts->match_options, unset_empty,
PCRE2_SUBSTITUTE_UNSET_EMPTY);
}
else
CAST_OBJ(match_opts, priv_call->priv,
VMOD_PCRE2_MATCH_CALL_MAGIC);
/* XXX mdata in PRIV_CALL? */
if ((match_task = get_task(ctx, priv_task, regex->vcl_name, ".sub()"))
== NULL)
return NULL;
mdata = pcre2_match_data_create_from_pattern(regex->code,
match_task->gctx);
if (mdata == NULL) {
VERRNOMEM(ctx, "initializing match data in %s.sub()",
regex->vcl_name);
return NULL;
}
/*
* Don't need to ensure that the subject is in workspace, as we do
* with matches, because we won't be retrieving backrefs, and we
* give pcre2 the rest of the workspace to write the substitution.
*/
if (subject == NULL)
subject = "";
if (len == 0)
len = PCRE2_ZERO_TERMINATED;
buf = (PCRE2_UCHAR *) WS_Front(ctx->ws);
bytes = (PCRE2_SIZE) WS_Reserve(ctx->ws, 0);
/* XXX param for start_offset */
ret = pcre2_substitute(regex->code, (PCRE2_SPTR)subject, len, 0,
match_opts->match_options, mdata,
match_opts->mctx, (PCRE2_SPTR)replacement,
PCRE2_ZERO_TERMINATED, buf, &bytes);
if (ret > 0) {
WS_Release(ctx->ws, bytes + 1);
return (VCL_STRING)buf;
}
WS_Release(ctx->ws, 0);
if (ret == 0)
return subject;
if (ret == PCRE2_ERROR_NOMEMORY) {
VERRNOMEM(ctx, "allocating substitution result in %s.sub()",
regex->vcl_name);
return NULL;
}
snap = WS_Snapshot(ctx->ws);
if ((msg = WS_Printf(ctx->ws, "in %s.sub()", regex->vcl_name)) == NULL)
msg = "";
report_pcre2_err(ctx, ret, msg, "");
WS_Reset(ctx->ws, snap);
return NULL;
}
/* Functional interface */
VCL_BOOL
......
......@@ -43,6 +43,14 @@ $Method STRING .backref(INT ref, STRING fallback = "**BACKREF METHOD FAILED**")
$Method STRING .namedref(STRING name,
STRING fallback = "**NAMEDREF METHOD FAILED**")
$Method STRING .sub(PRIV_CALL, PRIV_TASK, STRING subject, STRING replacement,
INT len=0, BOOL anchored=0, INT match_limit=0,
INT offset_limit=0, BOOL notbol=0, BOOL noteol=0,
BOOL notempty=0, BOOL notempty_atstart=0, BOOL no_jit=0,
BOOL no_utf_check=0, INT recursion_limit=0, BOOL suball=0,
BOOL sub_extended=0, BOOL unknown_unset=0,
BOOL unset_empty=0)
$Function BOOL match(PRIV_CALL, PRIV_TASK, STRING pattern, STRING subject,
BOOL allow_empty_class=0, BOOL anchored=0,
ENUM {ANYCRLF, UNICODE} bsr=0, BOOL alt_bsux=0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment