Add parsing of bodies (req.body, bereq.body and resp.body)

parent 076545a6
......@@ -31,7 +31,8 @@ AM_VTC_LOG_FLAGS = \
-p vmod_path="$(abs_builddir)/.libs:$(vmoddir)"
TESTS = \
vtc/vmod_frozen.vtc
vtc/vmod_frozen.vtc \
vtc/readme_example.vtc
# Documentation
......
......@@ -37,6 +37,20 @@
#include "vmod_frozen.h"
#include "frozen/frozen.h"
static void
errmsg(VRT_CTX, const char *fmt, ...)
{
va_list args;
AZ(ctx->method & VCL_MET_TASK_H);
va_start(args, fmt);
if (ctx->vsl)
VSLbv(ctx->vsl, SLT_VCL_Error, fmt, args);
else
VSLv(SLT_VCL_Error, NO_VXID, fmt, args);
va_end(args);
}
static enum type_e
type_parse(VCL_ENUM e) {
#define VMODENUM(n) if (e == VENUM(n)) return(type_ ## n);
......@@ -298,6 +312,164 @@ vmod_parser_parse(VRT_CTX, struct vmod_frozen_parser *vfpaa,
return (parse(ctx, vfpaa, s, s != NULL ? strlen(s) : 0));
}
struct collect_iter_priv {
unsigned magic;
#define COLLECT_ITER_PRIV_MAGIC 0x6ddd62cd
char *heap; // only if realloc'ed
const char *ptr;
size_t len;
size_t max;
};
static int v_matchproto_(objiterate_f)
collect_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
{
struct collect_iter_priv *cip;
char *heap;
size_t l;
CAST_OBJ_NOTNULL(cip, priv, COLLECT_ITER_PRIV_MAGIC);
if (ptr == NULL || len == 0)
return (0);
/*
* if this is the only segment, return without copying.
*
* comparing flush equal is deliberate: This is not to be true for
* OBJ_ITER_FLUSH
*/
if (cip->ptr == NULL && flush == OBJ_ITER_END) {
AZ(cip->heap);
AZ(cip->len);
cip->ptr = ptr;
cip->len = len;
return (0);
}
l = cip->len + len;
if (l > cip->max) {
free(cip->heap);
cip->heap = NULL;
cip->ptr = NULL;
cip->len = l;
return (1);
}
heap = realloc(cip->heap, cip->len + len);
AN(heap);
memcpy(heap + cip->len, ptr, len);
cip->heap = heap;
cip->ptr = heap;
cip->len += len;
return (0);
}
/* we need to keep the body copy around until the end of the task
* because .extract uses references to it after .parse_body().
*
* to hand the heap ptr to a priv_task if we have one
*/
static void
collect_free(VRT_CTX, void *p) {
(void) ctx;
free(p);
}
struct vmod_priv_methods collect_priv_methods[1] = {{
.magic = VMOD_PRIV_METHODS_MAGIC,
.type = "frozen collect delayed free",
.fini = collect_free
}};
VCL_BOOL
vmod_parser_parse_body(VRT_CTX,
struct VPFX(frozen_parser) *vfpaa, VCL_ENUM which, VCL_BYTES max)
{
struct collect_iter_priv cip[1];
struct vmod_priv *priv_task;
VCL_BOOL ret;
int r;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
INIT_OBJ(cip, COLLECT_ITER_PRIV_MAGIC);
cip->max = max;
if (which == VENUM(req_body)) {
if (ctx->req == NULL) {
errmsg(ctx, "xfrozen.parse_body(which = req_body) "
"called but no request body found");
return (0);
}
else {
r = VRB_Iterate(ctx->req->wrk, ctx->vsl, ctx->req,
collect_iter_f, cip);
}
}
else if (which == VENUM(bereq_body) &&
ctx->bo != NULL && ctx->bo->bereq_body != NULL) {
r = ObjIterate(ctx->bo->wrk, ctx->bo->bereq_body,
cip, collect_iter_f, 0);
}
else if (which == VENUM(bereq_body)) {
if (ctx->bo == NULL || ctx->bo->req == NULL) {
errmsg(ctx, "xfrozen.parse_body(which = bereq_body) "
"called but no backend request body found");
return (0);
}
else {
r = VRB_Iterate(ctx->bo->wrk, ctx->vsl, ctx->bo->req,
collect_iter_f, cip);
}
}
else if (which == VENUM(resp_body)) {
if (ctx->req == NULL || ctx->req->objcore == NULL) {
errmsg(ctx, "xfrozen.parse_body(which = resp_body) "
"called but no response body found");
return (0);
}
else {
r = ObjIterate(ctx->req->wrk, ctx->req->objcore,
cip, collect_iter_f, 0);
}
}
else
WRONG("which in parse_body");
if (r != 0 && cip->len > cip->max) {
errmsg(ctx, "xfrozen.parse_body(maxbytes = %zu) "
"exceeded", max);
AZ(cip->heap);
return (0);
}
if (r != 0) {
errmsg(ctx, "xfrozen.parse_body() collect failed %d", r);
free(cip->heap);
return (0);
}
AZ(r);
ret = parse(ctx, vfpaa, cip->ptr, cip->len);
if (ret == 0) {
free(cip->heap);
return (ret);
}
if (cip->heap != NULL) {
priv_task = VRT_priv_task(ctx, cip->heap);
if (priv_task == NULL) {
errmsg(ctx, "xfrozen.parse_body() out of workspace");
free(cip->heap);
return (0);
}
priv_task->priv = cip->heap;
priv_task->methods = collect_priv_methods;
}
return (ret);
}
static const enum type_e json_type_2_type_e[JSON_TYPES_CNT] = {
[JSON_TYPE_INVALID] = _TYPE_E_INVALID,
[JSON_TYPE_STRING] = type_STRING,
......
......@@ -12,7 +12,9 @@ $Module frozen 3 "JSON parsing with frozen"
DESCRIPTION
===========
This vmod makes available to VCL the _frozen_ JSON parser with low
.. _frozen: https://github.com/cesanta/frozen
This vmod makes available to VCL the `frozen`_ JSON parser with low
overhead: By specifying a set of expected JSON paths, a callback to
the parser is used to track only paths of interest, which can then be
extracted.
......@@ -50,6 +52,30 @@ Example
# ...
}
Example for parsing a request body and returning it in a different
format (as an array)::
import frozen;
import std;
sub vcl_init {
new json = frozen.parser();
json.expect(".a", NUMBER, required = true);
json.expect(".b", STRING, required = true);
}
sub vcl_recv {
if (! json.parse_body(req_body, 1K)) {
return (synth(400, "parse error"));
}
return (synth(200));
}
sub vcl_synth {
set resp.http.content-type = "application/json";
set resp.body = "[" + json.extract(".a") + "," +
{"""} + json.extract(".b") + {""]"};
return (deliver);
}
$Object parser(INT depth=10)
Instiantiate a JSON parser object.
......@@ -116,6 +142,27 @@ For expected paths, the first match is recorded.
Details on parse errors are logged as ``VCL_Error``
$Method BOOL .parse_body(ENUM {req_body, bereq_body, resp_body }
which, BYTES maxsize)
Same as `xfrozen.parse()`_, but parse the body given as the *which*
argument, of maximum size *maxsize*.
Note that, unless the body is stored as single, durable
(non-transient) segment, a full copy of the body is made on the heap
memory using :ref:`realloc(3)`. This is due to the fact that the
*frozen* parser used by this project does not support parsing
discontinuous segmented data. Thus, be mindful of memory requirements
when parsing large JSON objects. The *maxsize* argument limits the
maximum amount of additional heap memory used. If exceeded, the
`xfrozen.parse_body()`_ returns ``false`` and an appropriate error is
logged.
When used with a ``req_body`` or ``bereq_body`` *which*
argument, this method consumes the request body. If it is to
be used again (for example, to send it to a backend), it
should first be cached by calling ``std.cache_req_body(<size>)``.
$Method STRING .extract(STRING path, STRING null = "", STRING undef = "")
After a successful `.parse()`, extract the given path, which must have
......
varnishtest "example from readme"
varnish v1 -vcl+backend {
import frozen;
import std;
backend proforma none;
sub vcl_init {
new json = frozen.parser();
json.expect(".a", NUMBER, required = true);
json.expect(".b", STRING, required = true);
}
sub vcl_recv {
if (! json.parse_body(req_body, 1K)) {
return (synth(400, "parse error"));
}
return (synth(200));
}
sub vcl_synth {
set resp.http.content-type = "application/json";
set resp.body = "[" + json.extract(".a") + "," +
{"""} + json.extract(".b") + {""]"};
return (deliver);
}
} -start
# req.body uncached
client c1 {
txreq -url "/t" -nolen -hdr "Transfer-Encoding: chunked"
chunked "{ a: 1, b: \"hi there\", c: true} "
chunkedlen 0
rxresp
expect resp.status == 200
expect resp.body == "[1,\"hi there\"]"
} -run
......@@ -2,11 +2,16 @@ varnishtest "test vmod-frozen"
server s1 {
rxreq
txresp
txresp -nolen -hdr "Transfer-Encoding: chunked"
chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
chunked ": [ 7 ] } } "
chunkedlen 0
} -start
varnish v1 -vcl+backend {
import frozen;
import std;
sub vcl_init {
new p_test = frozen.parser();
......@@ -26,6 +31,12 @@ varnish v1 -vcl+backend {
p_test.expect(".g");
}
sub vcl_recv {
if (req.url == "/resp.body") {
return (hash);
}
if (req.http.cache) {
std.cache_req_body(std.bytes(req.http.max));
}
return (synth(200));
}
sub extract {
......@@ -73,10 +84,23 @@ varnish v1 -vcl+backend {
{" e : null, f: [ 1, -2, 3], g: { "1": [], h"} +
{": [ 7 ] } } "});
}
else if (req.url == "/req.body") {
set resp.http.parse = p_test.parse_body(req_body,
std.bytes(req.http.max));
}
call extract;
}
sub vcl_deliver {
set resp.http.fail = p_test.parse(req.http.not);
if (req.url == "/resp.body") {
set resp.http.parse = p_test.parse_body(resp_body,
std.bytes(req.http.max));
}
call extract;
}
} -start
# vcl string
client c1 {
txreq -url "/static"
rxresp
......@@ -105,4 +129,154 @@ client c1 {
expect resp.http.gh0-type == NUMBER
expect resp.http.g == {{ "1": [], h: [ 7 ] }}
expect resp.http.g-type == OBJECT
} -run
} -start
# req.body uncached
client c2 {
txreq -url "/req.body" -hdr "max: 1K" \
-nolen -hdr "Transfer-Encoding: chunked"
chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
chunked ": [ 7 ] } } "
chunkedlen 0
rxresp
expect resp.status == 200
expect resp.http.fail == false
expect resp.http.parse == true
expect resp.http.a == 1
expect resp.http.a-type == NUMBER
expect resp.http.b == "hi there"
expect resp.http.b-type == STRING
expect resp.http.c == true
expect resp.http.c-type == BOOL
expect resp.http.d == false
expect resp.http.d-type == BOOL
expect resp.http.e == "<null>"
expect resp.http.e-type == ""
expect resp.http.f0 == 1
expect resp.http.f0-type == NUMBER
expect resp.http.f1 == -2
expect resp.http.f1-type == NUMBER
expect resp.http.f == "[ 1, -2, 3]"
expect resp.http.f-type == ARRAY
expect resp.http.g1 == "[]"
expect resp.http.g1-type == ARRAY
expect resp.http.gh0 == 7
expect resp.http.gh0-type == NUMBER
expect resp.http.g == {{ "1": [], h: [ 7 ] }}
expect resp.http.g-type == OBJECT
} -start
# req.body cached
client c3 {
txreq -url "/req.body" -hdr "cache: true" -hdr "max: 1K" \
-nolen -hdr "Transfer-Encoding: chunked"
chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
chunked ": [ 7 ] } } "
chunkedlen 0
rxresp
expect resp.status == 200
expect resp.http.fail == false
expect resp.http.parse == true
expect resp.http.a == 1
expect resp.http.a-type == NUMBER
expect resp.http.b == "hi there"
expect resp.http.b-type == STRING
expect resp.http.c == true
expect resp.http.c-type == BOOL
expect resp.http.d == false
expect resp.http.d-type == BOOL
expect resp.http.e == "<null>"
expect resp.http.e-type == ""
expect resp.http.f0 == 1
expect resp.http.f0-type == NUMBER
expect resp.http.f1 == -2
expect resp.http.f1-type == NUMBER
expect resp.http.f == "[ 1, -2, 3]"
expect resp.http.f-type == ARRAY
expect resp.http.g1 == "[]"
expect resp.http.g1-type == ARRAY
expect resp.http.gh0 == 7
expect resp.http.gh0-type == NUMBER
expect resp.http.g == {{ "1": [], h: [ 7 ] }}
expect resp.http.g-type == OBJECT
} -start
# resp.body
client c4 {
txreq -url "/resp.body" -hdr "max: 1K"
rxresp
expect resp.status == 200
expect resp.http.fail == false
expect resp.http.parse == true
expect resp.http.a == 1
expect resp.http.a-type == NUMBER
expect resp.http.b == "hi there"
expect resp.http.b-type == STRING
expect resp.http.c == true
expect resp.http.c-type == BOOL
expect resp.http.d == false
expect resp.http.d-type == BOOL
expect resp.http.e == "<null>"
expect resp.http.e-type == ""
expect resp.http.f0 == 1
expect resp.http.f0-type == NUMBER
expect resp.http.f1 == -2
expect resp.http.f1-type == NUMBER
expect resp.http.f == "[ 1, -2, 3]"
expect resp.http.f-type == ARRAY
expect resp.http.g1 == "[]"
expect resp.http.g1-type == ARRAY
expect resp.http.gh0 == 7
expect resp.http.gh0-type == NUMBER
expect resp.http.g == {{ "1": [], h: [ 7 ] }}
expect resp.http.g-type == OBJECT
} -start
########################################
## failure modes
# maxbytes exceeded
logexpect l11 -v v1 -q "ReqURL ~ \"^/req.body\" and ReqHeader:max ~ \"^10B\"" {
expect * * ReqURL {^/req.body}
expect * = VCL_Error {^xfrozen.parse_body.maxbytes = 10. exceeded}
} -start
client c11 {
txreq -url "/req.body" -hdr "max: 10B" \
-nolen -hdr "Transfer-Encoding: chunked"
chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
chunked ": [ 7 ] } } "
chunkedlen 0
rxresp
expect resp.status == 500
} -start
# no req.body
logexpect l12 -v v1 -q "ReqURL ~ \"^/req.body\" and ReqHeader:max ~ \"^1B\"" {
expect * * ReqURL {^/req.body}
expect * = VCL_Error {^no state from .parse()}
} -start
client c12 {
txreq -url "/req.body" -hdr "max: 1B"
rxresp
expect resp.status == 500
} -start
client c1 -wait
client c2 -wait
client c3 -wait
client c4 -wait
client c11 -wait
client c12 -wait
logexpect l11 -wait
logexpect l12 -wait
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment