Add .foreach_body()

We can now iterate over bodies, calling a subroutine for each
match.
parent 321a5f05
......@@ -19,9 +19,9 @@ vmod_re Changes
NEXT
----
* The ``xregex.foreach()`` method has been added to call a VCL
subroutine on all matches, which can then use ``xregex.backref()``
to access them. Example::
* The ``xregex.foreach()`` and ``xregex.foreach_body()`` methods have
been added to call a VCL subroutine on all matches, which can then
use ``xregex.backref()`` to access them. Example::
sub myregex_collect {
set resp.http.all += myregex.backref(0);
......@@ -32,6 +32,8 @@ NEXT
myregex.foreach(req.http.input, myregex_collect);
}
``xregex.foreach_body()`` is basically identical, but works on
bodies.
* With the optional ``asfilter`` argument to ``re.regex()``, a varnish
filter is created, which can be used to run regular expression
......
......@@ -33,6 +33,8 @@ SYNOPSIS
# Iterators
BOOL <obj>.foreach(STRING, SUB, [, INT limit] [, INT limit_recursion])
BOOL <obj>.foreach_body(req_body | bereq_body | resp_body, SUB,
[, INT limit] [, INT limit_recursion])
# filter interface (includes all of the above)
new <obj> = re.regex(STRING [, INT limit] [, INT limit_recursion]
......@@ -324,6 +326,58 @@ Example::
# first value as v1, and the second parameter and value as n2
# and v2
.. _xregex.foreach_body():
BOOL xregex.foreach_body(ENUM which, SUB sub, INT limit, INT limit_recursion)
-----------------------------------------------------------------------------
::
BOOL xregex.foreach_body(
ENUM {req_body, bereq_body, resp_body} which,
SUB sub,
INT limit=0,
INT limit_recursion=0
)
Description
Calls subroutine *sub* as if `xregex.match()`_ was run for all
matches on the given body. If there are no matches, the
subroutine is not called. `xregex.backref()`_ can be used to
retrieve the match constituents.
See also `xregex.match_body()`_.
Example::
# for key=value separated by &, collect two a and/or b key pairs
#
# sample output: a=1,b=22;b=333,a=4444;
#
sub vcl_init {
new pattern = re.regex("(?:^|&)(a|b)=([^&]*).*?&(a|b)=([^&]*)",
forbody=true);
}
sub collect {
set resp.http.all +=
pattern.backref(1) + "=" + pattern.backref(2) + "," +
pattern.backref(3) + "=" + pattern.backref(4) + ";";
}
sub vcl_synth {
unset resp.http.all;
if (pattern.foreach_body(req_body, collect)) {
set resp.status = 200;
}
return (deliver);
}
sub vcl_recv {
return (synth(400));
}
.. _xregex.backref():
STRING xregex.backref(INT, STRING fallback)
......
varnishtest ".foreach_body(*req_body)"
varnish v1 -vcl {
import re from "${vmod_topbuild}/src/.libs/libvmod_re.so";
import std;
backend none none;
sub vcl_init {
# limit is the minimum for which the last client req passes
new pattern = re.regex("(?:^|&)(a|b)=([^&]*).*?&(a|b)=([^&]*)",
forbody=true, limit=1749);
}
sub collect {
set resp.http.all +=
pattern.backref(1) + "=" + pattern.backref(2) + "," +
pattern.backref(3) + "=" + pattern.backref(4) + ";";
}
sub vcl_synth {
unset resp.http.all;
if (pattern.foreach_body(req_body, collect)) {
set resp.status = 200;
}
return (deliver);
}
sub vcl_recv {
return (synth(400));
}
} -start
client c1 -repeat 4 {
txreq
rxresp
expect resp.status == 400
txreq -body "wontmatch"
rxresp
expect resp.status == 400
# simple case - all in one, C-l
txreq -body "saldkhaskdhsaks&a=123&sadsadjhsakdh82378e3d&b=43875643543"
rxresp
expect resp.status == 200
expect resp.http.all == "a=123,b=43875643543;"
txreq -nolen -hdr "Transfer-encoding: chunked"
chunked "a="
chunked "12"
chunked "3&sadsadjhsakdh82378e3d&b=43"
chunked "875643543&"
chunked "saldkhaskdhs"
chunked "aks&a="
chunked "45"
chunked "6&sadsadjhsakdh82378e3d&b=78"
chunked "2103879213"
chunkedlen 0
rxresp
expect resp.status == 200
expect resp.http.all == "a=123,b=43875643543;a=456,b=782103879213;"
txreq -nolen -hdr "Transfer-encoding: chunked"
chunked "&&&"
chunked "a"
chunked "="
chunked "1"
chunked "&"
chunkedlen 1745
chunked "&"
chunked "b"
chunked "="
chunked "2"
chunked "2"
chunked "&"
chunkedlen 12352
chunked "&"
chunked "b"
chunked "="
chunked "3"
chunked "3"
chunked "3"
chunked "&"
chunkedlen 1745
chunked "&"
chunked "a"
chunked "="
chunked "4"
chunked "4"
chunked "4"
chunked "4"
chunkedlen 0
rxresp
expect resp.status == 200
expect resp.http.all == "a=1,b=22;b=333,a=4444;"
} -run
......@@ -491,6 +491,7 @@ struct re_iter_priv {
int s;
int ret;
VRT_CTX;
VCL_SUB sub;
const vre_t *vre;
PCRE2_SIZE startoffset;
struct vmod_priv *task;
......@@ -754,11 +755,53 @@ vmod_regex_match_body(VRT_CTX, struct vmod_re_regex *re, VCL_ENUM which,
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
INIT_OBJ(reip, RE_ITER_PRIV_MAGIC);
reip->ctx = ctx;
AZ(reip->sub);
reip->vre_limits = get_limits(re, &buf, limit, limit_recursion);
return (reip_match_body(reip, re, match_iter_f, which));
}
static int v_matchproto_(objiterate_f)
foreach_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
{
struct re_iter_priv *reip;
int i;
CAST_OBJ_NOTNULL(reip, priv, RE_ITER_PRIV_MAGIC);
#ifdef ITERDBG
VSLb(reip->ctx->vsl, SLT_Debug, "foreach_iter_f "
"flush=%u, s=%d, ptr=%.*s, len=%zd",
flush, reip->s, (int)len, (const char *)ptr, len);
#endif
i = reip_iter(reip, flush, ptr, len);
while (i == 0 && reip->s > PCRE2_ERROR_NOMATCH) {
VRT_call(reip->ctx, reip->sub);
i = reip_iter(reip, flush, ptr, len);
}
return (i);
}
VCL_BOOL
vmod_regex_foreach_body(VRT_CTX, struct vmod_re_regex *re, VCL_ENUM which,
VCL_SUB sub, VCL_INT limit, VCL_INT limit_recursion)
{
struct re_iter_priv reip[1];
struct vre_limits buf;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
AN(sub);
INIT_OBJ(reip, RE_ITER_PRIV_MAGIC);
reip->ctx = ctx;
reip->sub = sub;
reip->vre_limits = get_limits(re, &buf, limit, limit_recursion);
return (reip_match_body(reip, re, foreach_iter_f, which));
}
VCL_STRING
vmod_regex_backref(VRT_CTX, struct vmod_re_regex *re, VCL_INT refnum,
VCL_STRING fallback)
......
......@@ -29,6 +29,8 @@ SYNOPSIS
# Iterators
BOOL <obj>.foreach(STRING, SUB, [, INT limit] [, INT limit_recursion])
BOOL <obj>.foreach_body(req_body | bereq_body | resp_body, SUB,
[, INT limit] [, INT limit_recursion])
# filter interface (includes all of the above)
new <obj> = re.regex(STRING [, INT limit] [, INT limit_recursion]
......@@ -279,6 +281,47 @@ Example::
# first value as v1, and the second parameter and value as n2
# and v2
$Method BOOL .foreach_body(ENUM {req_body, bereq_body, resp_body } which,
SUB sub, INT limit=0, INT limit_recursion=0)
Description
Calls subroutine *sub* as if `xregex.match()`_ was run for all
matches on the given body. If there are no matches, the
subroutine is not called. `xregex.backref()`_ can be used to
retrieve the match constituents.
See also `xregex.match_body()`_.
Example::
# for key=value separated by &, collect two a and/or b key pairs
#
# sample output: a=1,b=22;b=333,a=4444;
#
sub vcl_init {
new pattern = re.regex("(?:^|&)(a|b)=([^&]*).*?&(a|b)=([^&]*)",
forbody=true);
}
sub collect {
set resp.http.all +=
pattern.backref(1) + "=" + pattern.backref(2) + "," +
pattern.backref(3) + "=" + pattern.backref(4) + ";";
}
sub vcl_synth {
unset resp.http.all;
if (pattern.foreach_body(req_body, collect)) {
set resp.status = 200;
}
return (deliver);
}
sub vcl_recv {
return (synth(400));
}
$Method STRING .backref(INT, STRING fallback="**BACKREF METHOD FAILED**")
Description
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment